🐛 fix: fix chat error when message has image with non-vision model (#698

) * 📝 docs: update docs * 🐛 fix: fix chat error with non-vision model close #693
lobehub · Dec 17, 2023 · b142c17 · b142c17
1 parent 0364c1e
commit b142c17
Show file tree

Hide file tree

Showing 4 changed files with 157 additions and 92 deletions.
diff --git a/README.md b/README.md
@@ -150,10 +150,10 @@ such as automatically fetching the latest news headlines to provide users with i
 Moreover, these plugins are not limited to news aggregation but can also extend to other practical functions, such as quick document retrieval,
 e-commerce platform data access, and various third-party services.
 
-<video controls src="https://github.com/lobehub/lobe-chat/assets/28616219/f29475a3-f346-4196-a435-41a6373ab9e2" muted="false"></video>
-
 [Learn More →](./docs/Usage/Plugins.md)
 
+<video controls src="https://github.com/lobehub/lobe-chat/assets/28616219/f29475a3-f346-4196-a435-41a6373ab9e2" muted="false"></video>
+
 > \[!TIP]
 >
 > To aid developers in joining this ecosystem, we provide comprehensive development resources in the [🧩 Plugin System](#-plugins) section.

diff --git a/README.zh-CN.md b/README.zh-CN.md
@@ -133,10 +133,10 @@ LobeChat 支持文字转语音（Text-to-Speech，TTS）和语音转文字（Spe
 LobeChat 的插件生态系统是其核心功能的重要扩展，它极大地增强了 ChatGPT 的实用性和灵活性。通过利用插件，ChatGPT 能够实现实时信息的获取和处理，例如自动获取最新新闻头条，为用户提供即时且相关的资讯。
 此外，这些插件不仅局限于新闻聚合，还可以扩展到其他实用的功能，如快速检索文档、获取电商平台数据、以及其他各式各样的第三方服务。
 
-<video controls src="https://github.com/lobehub/lobe-chat/assets/28616219/f29475a3-f346-4196-a435-41a6373ab9e2" muted="false"></video>
-
 [了解更多 →](./docs/Usage/Plugins.zh-CN.md)
 
+<video controls src="https://github.com/lobehub/lobe-chat/assets/28616219/f29475a3-f346-4196-a435-41a6373ab9e2" muted="false"></video>
+
 > \[!TIP]
 >
 > 为了帮助开发者更好地参与到这个生态中来，我们在 [🧩 插件体系](#-插件体系) 部分提供了全面的开发资源。

diff --git a/src/services/__tests__/chat.test.ts b/src/services/__tests__/chat.test.ts
@@ -84,102 +84,150 @@ describe('ChatService', () => {
       );
     });
 
-    it('should correctly process messages and handle content for vision models', async () => {
-      const messages = [
-        { content: 'Hello', role: 'user', files: ['file1'] }, // Message with files
-        { content: 'Hi', role: 'function', plugin: { identifier: 'plugin1' } }, // Message with function role
-        { content: 'Hey', role: 'assistant' }, // Regular user message
-      ] as ChatMessage[];
-
-      // Mock file store state to return a specific image URL or Base64 for the given files
-      act(() => {
-        useFileStore.setState({
-          imagesMap: {
-            file1: {
-              name: 'abc.png',
-              saveMode: 'url',
-              fileType: 'image/png',
-              url: 'http://example.com/image.jpg',
+    describe('should handle content correctly for vision models', () => {
+      it('should include image content when with vision model', async () => {
+        const messages = [
+          { content: 'Hello', role: 'user', files: ['file1'] }, // Message with files
+          { content: 'Hi', role: 'function', plugin: { identifier: 'plugin1' } }, // Message with function role
+          { content: 'Hey', role: 'assistant' }, // Regular user message
+        ] as ChatMessage[];
+
+        // Mock file store state to return a specific image URL or Base64 for the given files
+        act(() => {
+          useFileStore.setState({
+            imagesMap: {
+              file1: {
+                name: 'abc.png',
+                saveMode: 'url',
+                fileType: 'image/png',
+                url: 'http://example.com/image.jpg',
+              },
             },
-          },
+          });
+        });
+
+        const getChatCompletionSpy = vi.spyOn(chatService, 'getChatCompletion');
+        await chatService.createAssistantMessage({
+          messages,
+          plugins: [],
+          model: 'gpt-4-vision-preview',
         });
+
+        expect(getChatCompletionSpy).toHaveBeenCalledWith(
+          {
+            messages: [
+              {
+                content: [
+                  { text: 'Hello', type: 'text' },
+                  {
+                    image_url: { detail: 'auto', url: 'http://example.com/image.jpg' },
+                    type: 'image_url',
+                  },
+                ],
+                role: 'user',
+              },
+              {
+                content: 'Hi',
+                name: 'plugin1',
+                role: 'function',
+              },
+              {
+                content: 'Hey',
+                role: 'assistant',
+              },
+            ],
+            model: 'gpt-4-vision-preview',
+          },
+          undefined,
+        );
       });
 
-      const getChatCompletionSpy = vi.spyOn(chatService, 'getChatCompletion');
-      await chatService.createAssistantMessage({ messages, plugins: [] });
+      it('should not include image content when default model', async () => {
+        const messages = [
+          { content: 'Hello', role: 'user', files: ['file1'] }, // Message with files
+          { content: 'Hi', role: 'function', plugin: { identifier: 'plugin1' } }, // Message with function role
+          { content: 'Hey', role: 'assistant' }, // Regular user message
+        ] as ChatMessage[];
 
-      expect(getChatCompletionSpy).toHaveBeenCalledWith(
-        {
-          messages: [
-            {
-              content: [
-                { text: 'Hello', type: 'text' },
-                {
-                  image_url: { detail: 'auto', url: 'http://example.com/image.jpg' },
-                  type: 'image_url',
-                },
-              ],
-              role: 'user',
-            },
-            {
-              content: 'Hi',
-              name: 'plugin1',
-              role: 'function',
-            },
-            {
-              content: 'Hey',
-              role: 'assistant',
+        // Mock file store state to return a specific image URL or Base64 for the given files
+        act(() => {
+          useFileStore.setState({
+            imagesMap: {
+              file1: {
+                name: 'abc.png',
+                saveMode: 'url',
+                fileType: 'image/png',
+                url: 'http://example.com/image.jpg',
+              },
             },
-          ],
-        },
-        undefined,
-      );
-    });
+          });
+        });
 
-    it('should correctly process messages and handle content for vision models', async () => {
-      const messages = [
-        { content: 'Hello', role: 'user', files: ['file2'] }, // Message with files
-        { content: 'Hi', role: 'function', plugin: { identifier: 'plugin1' } }, // Message with function role
-        { content: 'Hey', role: 'assistant' }, // Regular user message
-      ] as ChatMessage[];
-
-      // Mock file store state to return a specific image URL or Base64 for the given files
-      act(() => {
-        useFileStore.setState({
-          imagesMap: {
-            file1: {
-              name: 'abc.png',
-              saveMode: 'url',
-              fileType: 'image/png',
-              url: 'http://example.com/image.jpg',
-            },
-          },
+        const getChatCompletionSpy = vi.spyOn(chatService, 'getChatCompletion');
+        await chatService.createAssistantMessage({
+          messages,
+          plugins: [],
+          model: 'gpt-3.5-turbo',
         });
+
+        expect(getChatCompletionSpy).toHaveBeenCalledWith(
+          {
+            messages: [
+              { content: 'Hello', role: 'user' },
+              { content: 'Hi', name: 'plugin1', role: 'function' },
+              { content: 'Hey', role: 'assistant' },
+            ],
+            model: 'gpt-3.5-turbo',
+          },
+          undefined,
+        );
       });
 
-      const getChatCompletionSpy = vi.spyOn(chatService, 'getChatCompletion');
-      await chatService.createAssistantMessage({ messages, plugins: [] });
+      it('should not include image with vision models when can not find the image', async () => {
+        const messages = [
+          { content: 'Hello', role: 'user', files: ['file2'] }, // Message with files
+          { content: 'Hi', role: 'function', plugin: { identifier: 'plugin1' } }, // Message with function role
+          { content: 'Hey', role: 'assistant' }, // Regular user message
+        ] as ChatMessage[];
 
-      expect(getChatCompletionSpy).toHaveBeenCalledWith(
-        {
-          messages: [
-            {
-              content: 'Hello',
-              role: 'user',
-            },
-            {
-              content: 'Hi',
-              name: 'plugin1',
-              role: 'function',
-            },
-            {
-              content: 'Hey',
-              role: 'assistant',
+        // Mock file store state to return a specific image URL or Base64 for the given files
+        act(() => {
+          useFileStore.setState({
+            imagesMap: {
+              file1: {
+                name: 'abc.png',
+                saveMode: 'url',
+                fileType: 'image/png',
+                url: 'http://example.com/image.jpg',
+              },
             },
-          ],
-        },
-        undefined,
-      );
+          });
+        });
+
+        const getChatCompletionSpy = vi.spyOn(chatService, 'getChatCompletion');
+        await chatService.createAssistantMessage({ messages, plugins: [] });
+
+        expect(getChatCompletionSpy).toHaveBeenCalledWith(
+          {
+            messages: [
+              {
+                content: 'Hello',
+                role: 'user',
+              },
+              {
+                content: 'Hi',
+                name: 'plugin1',
+                role: 'function',
+              },
+              {
+                content: 'Hey',
+                role: 'assistant',
+              },
+            ],
+          },
+          undefined,
+        );
+      });
     });
 
     describe('with tools messages', () => {

diff --git a/src/services/chat.ts b/src/services/chat.ts
@@ -15,6 +15,8 @@ import { fetchAIFactory, getMessageError } from '@/utils/fetch';
 import { createHeaderWithOpenAI } from './_header';
 import { OPENAI_URLS, URLS } from './_url';
 
+const isVisionModel = (model?: string) => model && VISION_MODEL_WHITE_LIST.includes(model);
+
 interface FetchOptions {
   signal?: AbortSignal | undefined;
 }
@@ -38,7 +40,11 @@ class ChatService {
     );
     // ============  1. preprocess messages   ============ //
 
-    const oaiMessages = this.processMessages(messages, enabledPlugins);
+    const oaiMessages = this.processMessages({
+      messages,
+      model: payload.model,
+      tools: enabledPlugins,
+    });
 
     // ============  2. preprocess tools   ============ //
 
@@ -48,8 +54,7 @@ class ChatService {
     // 1. tools is not empty
     // 2. model is not in vision white list, because vision model can't use tools
     // TODO: we need to find some method to let vision model use tools
-    const shouldUseTools =
-      filterTools.length > 0 && !VISION_MODEL_WHITE_LIST.includes(payload.model);
+    const shouldUseTools = filterTools.length > 0 && !isVisionModel(payload.model);
 
     const functions = shouldUseTools ? filterTools : undefined;
 
@@ -103,7 +108,15 @@ class ChatService {
 
   fetchPresetTaskResult = fetchAIFactory(this.getChatCompletion);
 
-  private processMessages = (messages: ChatMessage[], tools?: string[]): OpenAIChatMessage[] => {
+  private processMessages = ({
+    messages,
+    tools,
+    model,
+  }: {
+    messages: ChatMessage[];
+    model?: string;
+    tools?: string[];
+  }): OpenAIChatMessage[] => {
     // handle content type for vision model
     // for the models with visual ability, add image url to content
     // refs: https://platform.openai.com/docs/guides/vision/quick-start
@@ -114,6 +127,10 @@ class ChatService {
 
       if (imageList.length === 0) return m.content;
 
+      if (!isVisionModel(model)) {
+        return m.content;
+      }
+
       return [
         { text: m.content, type: 'text' },
         ...imageList.map(