From 5d11c4f9699844d86d9512fedc5b10ea92d8e223 Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Tue, 7 May 2024 10:00:43 +0000 Subject: [PATCH 01/24] =?UTF-8?q?=E2=9C=A8=20feat:=20finish=20OpenAI=20str?= =?UTF-8?q?eam=20tool=5Fcalls=20protocol?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 1 + src/database/client/schemas/message.ts | 12 +- src/features/Conversation/Actions/index.ts | 1 + .../Conversation/Messages/Assistant.tsx | 26 --- .../Messages/Assistant/ToolCalls/Settings.tsx | 39 +++++ .../Messages/Assistant/ToolCalls/index.tsx | 72 ++++++++ .../Messages/Assistant/ToolCalls/style.ts | 25 +++ .../Conversation/Messages/Assistant/index.tsx | 36 ++++ .../Conversation/Messages/Tool/index.tsx | 39 +++++ src/features/Conversation/Messages/index.ts | 2 + .../components/ChatItem/index.tsx | 4 + src/libs/agent-runtime/types/chat.ts | 5 +- .../agent-runtime/utils/anthropicHelpers.ts | 5 +- .../utils/openaiCompatibleFactory/index.ts | 5 +- src/libs/agent-runtime/utils/response.ts | 12 ++ src/libs/agent-runtime/utils/streams/index.ts | 2 + .../agent-runtime/utils/streams/openai.ts | 26 +++ .../agent-runtime/utils/streams/protocol.ts | 41 +++++ src/locales/default/plugin.ts | 2 + src/services/chat.ts | 79 +++++---- src/store/chat/slices/message/action.test.ts | 11 -- src/store/chat/slices/message/action.ts | 126 ++++++-------- src/store/chat/slices/message/reducer.ts | 19 +- src/store/chat/slices/message/selectors.ts | 10 +- src/store/chat/slices/plugin/action.test.ts | 24 +-- src/store/chat/slices/plugin/action.ts | 163 +++++++++--------- src/types/llm.ts | 2 +- src/types/message/index.ts | 15 +- src/types/message/tools.ts | 57 ++++++ src/types/openai/chat.ts | 6 + src/utils/fetch.test.ts | 93 +++++++++- src/utils/fetch.ts | 140 ++++++++++----- 32 files changed, 795 insertions(+), 305 deletions(-) delete mode 100644 src/features/Conversation/Messages/Assistant.tsx create mode 100644 src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx create mode 100644 src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx create mode 100644 src/features/Conversation/Messages/Assistant/ToolCalls/style.ts create mode 100644 src/features/Conversation/Messages/Assistant/index.tsx create mode 100644 src/features/Conversation/Messages/Tool/index.tsx create mode 100644 src/libs/agent-runtime/utils/response.ts create mode 100644 src/libs/agent-runtime/utils/streams/index.ts create mode 100644 src/libs/agent-runtime/utils/streams/openai.ts create mode 100644 src/libs/agent-runtime/utils/streams/protocol.ts diff --git a/package.json b/package.json index 62798c97c1ac..3ddfca595c23 100644 --- a/package.json +++ b/package.json @@ -97,6 +97,7 @@ "@lobehub/icons": "latest", "@lobehub/tts": "latest", "@lobehub/ui": "^1.138.17", + "@microsoft/fetch-event-source": "^2.0.1", "@next/third-parties": "^14.2.3", "@sentry/nextjs": "^7.112.2", "@t3-oss/env-nextjs": "^0.10.1", diff --git a/src/database/client/schemas/message.ts b/src/database/client/schemas/message.ts index f198e0159529..288606953581 100644 --- a/src/database/client/schemas/message.ts +++ b/src/database/client/schemas/message.ts @@ -1,6 +1,8 @@ /* eslint-disable sort-keys-fix/sort-keys-fix */ import { z } from 'zod'; +import { MessageToolCallSchema } from '@/types/message'; + const TranslateSchema = z.object({ from: z.string().optional(), to: z.string(), @@ -14,13 +16,21 @@ const PluginSchema = z.object({ type: z.enum(['default', 'markdown', 'standalone', 'builtin']).default('default'), }); +const ToolCallSchema = PluginSchema.extend({ + id: z.string(), +}); + export const DB_MessageSchema = z.object({ - role: z.enum(['user', 'system', 'assistant', 'function']), + role: z.enum(['user', 'system', 'assistant', 'function', 'tool']), content: z.string(), files: z.array(z.string()).optional(), favorite: z.number().int().min(0).max(1).optional(), error: z.any().optional(), + tool_calls: z.array(MessageToolCallSchema).optional(), + tools: z.array(ToolCallSchema).optional(), + tool: ToolCallSchema.optional(), + plugin: PluginSchema.optional(), pluginState: z.any().optional(), fromModel: z.string().optional(), diff --git a/src/features/Conversation/Actions/index.ts b/src/features/Conversation/Actions/index.ts index 72c7f1724b9f..71feebab93b2 100644 --- a/src/features/Conversation/Actions/index.ts +++ b/src/features/Conversation/Actions/index.ts @@ -15,6 +15,7 @@ export const renderActions: Record = { assistant: AssistantActionsBar, function: FunctionActionsBar, system: DefaultActionsBar, + tool: FunctionActionsBar, user: UserActionsBar, }; diff --git a/src/features/Conversation/Messages/Assistant.tsx b/src/features/Conversation/Messages/Assistant.tsx deleted file mode 100644 index 6a64f124aa6c..000000000000 --- a/src/features/Conversation/Messages/Assistant.tsx +++ /dev/null @@ -1,26 +0,0 @@ -import { ReactNode, memo } from 'react'; - -import { isFunctionMessageAtStart } from '@/const/message'; -import { useChatStore } from '@/store/chat'; -import { chatSelectors } from '@/store/chat/selectors'; -import { ChatMessage } from '@/types/message'; - -import Inspector from '../Plugins/Inspector'; -import { DefaultMessage } from './Default'; - -export const AssistantMessage = memo< - ChatMessage & { - editableContent: ReactNode; - } ->(({ id, plugin, content, ...props }) => { - const fcProps = useChatStore(chatSelectors.getFunctionMessageProps({ content, id, plugin })); - - if (!isFunctionMessageAtStart(content)) - return ; - - return ( -
- -
- ); -}); diff --git a/src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx b/src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx new file mode 100644 index 000000000000..e9183dfe24cf --- /dev/null +++ b/src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx @@ -0,0 +1,39 @@ +import { ActionIcon } from '@lobehub/ui'; +import { LucideSettings } from 'lucide-react'; +import { memo, useState } from 'react'; +import { useTranslation } from 'react-i18next'; + +import PluginDetailModal from '@/features/PluginDetailModal'; +import { pluginHelpers, useToolStore } from '@/store/tool'; +import { pluginSelectors } from '@/store/tool/selectors'; + +const Settings = memo<{ id: string }>(({ id }) => { + const item = useToolStore(pluginSelectors.getPluginManifestById(id)); + const [open, setOpen] = useState(false); + const { t } = useTranslation('plugin'); + const hasSettings = pluginHelpers.isSettingSchemaNonEmpty(item?.settings); + + return ( + hasSettings && ( + <> + { + setOpen(true); + }} + title={t('setting')} + /> + { + setOpen(false); + }} + open={open} + schema={item?.settings} + /> + + ) + ); +}); + +export default Settings; diff --git a/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx b/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx new file mode 100644 index 000000000000..681778525e1f --- /dev/null +++ b/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx @@ -0,0 +1,72 @@ +import { Loading3QuartersOutlined } from '@ant-design/icons'; +import { Avatar, Highlighter, Icon } from '@lobehub/ui'; +import isEqual from 'fast-deep-equal'; +import { LucideChevronDown, LucideChevronUp, LucideToyBrick } from 'lucide-react'; +import { memo, useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import { Flexbox } from 'react-layout-kit'; + +import { pluginHelpers, useToolStore } from '@/store/tool'; +import { toolSelectors } from '@/store/tool/selectors'; + +import { useStyles } from './style'; + +export interface InspectorProps { + arguments?: string; + identifier: string; + loading?: boolean; +} + +const Inspector = memo(({ arguments: requestArgs = '{}', loading, identifier }) => { + const { t } = useTranslation('plugin'); + const { styles } = useStyles(); + const [open, setOpen] = useState(false); + + const pluginMeta = useToolStore(toolSelectors.getMetaById(identifier), isEqual); + + const pluginAvatar = pluginHelpers.getPluginAvatar(pluginMeta); + + const pluginTitle = pluginHelpers.getPluginTitle(pluginMeta) ?? t('plugins.loading'); + + const avatar = pluginAvatar ? ( + + ) : ( + + ); + + let params; + try { + params = JSON.stringify(JSON.parse(requestArgs), null, 2); + } catch { + params = requestArgs; + } + + return ( + + + { + setOpen(!open); + }} + > + {loading ? ( +
+ +
+ ) : ( + avatar + )} + {pluginTitle} + +
+
+ {open && {params}} +
+ ); +}); + +export default Inspector; diff --git a/src/features/Conversation/Messages/Assistant/ToolCalls/style.ts b/src/features/Conversation/Messages/Assistant/ToolCalls/style.ts new file mode 100644 index 000000000000..3cef48295aa4 --- /dev/null +++ b/src/features/Conversation/Messages/Assistant/ToolCalls/style.ts @@ -0,0 +1,25 @@ +import { createStyles } from 'antd-style'; + +export const useStyles = createStyles(({ css, token }) => ({ + container: css` + cursor: pointer; + + width: fit-content; + padding-inline: 4px 6px; + + color: ${token.colorText}; + + background: ${token.colorFillTertiary}; + border-radius: 8px; + + &:hover { + background: ${token.colorFillSecondary}; + } + `, + plugin: css` + display: flex; + gap: 4px; + align-items: center; + width: fit-content; + `, +})); diff --git a/src/features/Conversation/Messages/Assistant/index.tsx b/src/features/Conversation/Messages/Assistant/index.tsx new file mode 100644 index 000000000000..d9ed3b78fb98 --- /dev/null +++ b/src/features/Conversation/Messages/Assistant/index.tsx @@ -0,0 +1,36 @@ +import { ReactNode, memo } from 'react'; +import { Flexbox } from 'react-layout-kit'; + +import { useChatStore } from '@/store/chat'; +import { chatSelectors } from '@/store/chat/selectors'; +import { ChatMessage } from '@/types/message'; + +import { DefaultMessage } from '../Default'; +import ToolCalls from './ToolCalls'; + +export const AssistantMessage = memo< + ChatMessage & { + editableContent: ReactNode; + } +>(({ id, tools, content, ...props }) => { + const editing = useChatStore(chatSelectors.isMessageEditing(id)); + + return ( + + {(content || editing) && ( + + )} + {!editing && ( + + {tools?.map((toolCall) => ( + + ))} + + )} + + ); +}); diff --git a/src/features/Conversation/Messages/Tool/index.tsx b/src/features/Conversation/Messages/Tool/index.tsx new file mode 100644 index 000000000000..86683a46cca9 --- /dev/null +++ b/src/features/Conversation/Messages/Tool/index.tsx @@ -0,0 +1,39 @@ +import isEqual from 'fast-deep-equal'; +import { memo, useState } from 'react'; +import { Flexbox } from 'react-layout-kit'; + +import { LOADING_FLAT } from '@/const/message'; +import { useChatStore } from '@/store/chat'; +import { chatSelectors } from '@/store/chat/selectors'; +import { ChatMessage } from '@/types/message'; + +import Inspector from '../../Plugins/Inspector'; +import PluginRender from '../../Plugins/Render'; +import BubblesLoading from '../../components/BubblesLoading'; + +export const ToolMessage = memo(({ id, content, tool }) => { + const fcProps = useChatStore( + chatSelectors.getFunctionMessageProps({ content, id, plugin: tool }), + isEqual, + ); + + const [showRender, setShow] = useState(true); + + if (content === LOADING_FLAT) return ; + + return ( + + + {showRender && ( + + )} + + ); +}); diff --git a/src/features/Conversation/Messages/index.ts b/src/features/Conversation/Messages/index.ts index 273223593c66..7a0469eef0c9 100644 --- a/src/features/Conversation/Messages/index.ts +++ b/src/features/Conversation/Messages/index.ts @@ -7,12 +7,14 @@ import { OnAvatarsClick, RenderMessage } from '../types'; import { AssistantMessage } from './Assistant'; import { DefaultMessage } from './Default'; import { FunctionMessage } from './Function'; +import { ToolMessage } from './Tool'; import { UserMessage } from './User'; export const renderMessages: Record = { assistant: AssistantMessage, default: DefaultMessage, function: FunctionMessage, + tool: ToolMessage, user: UserMessage, }; diff --git a/src/features/Conversation/components/ChatItem/index.tsx b/src/features/Conversation/components/ChatItem/index.tsx index 30eecbf31835..89545ee06fea 100644 --- a/src/features/Conversation/components/ChatItem/index.tsx +++ b/src/features/Conversation/components/ChatItem/index.tsx @@ -56,6 +56,10 @@ const Item = memo(({ index, id }) => { }, isEqual); const historyLength = useChatStore((s) => chatSelectors.currentChats(s).length); + const [editing, toggleMessageEditing] = useChatStore((s) => [ + chatSelectors.isMessageEditing(id)(s), + s.toggleMessageEditing, + ]); const [isMessageLoading, generating, editing, toggleMessageEditing, updateMessageContent] = useChatStore((s) => [ diff --git a/src/libs/agent-runtime/types/chat.ts b/src/libs/agent-runtime/types/chat.ts index a32951793aed..b7b5eb918d1d 100644 --- a/src/libs/agent-runtime/types/chat.ts +++ b/src/libs/agent-runtime/types/chat.ts @@ -1,6 +1,8 @@ import { OpenAIStreamCallbacks } from 'ai'; -export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function'; +import { MessageToolCall } from '@/types/message'; + +export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function' | 'tool'; interface UserMessageContentPartText { text: string; @@ -30,6 +32,7 @@ export interface OpenAIChatMessage { * @description 消息发送者的角色 */ role: LLMRoleType; + tool_calls?: MessageToolCall[]; } /** diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.ts b/src/libs/agent-runtime/utils/anthropicHelpers.ts index 9a1bab628737..0528f296a38f 100644 --- a/src/libs/agent-runtime/utils/anthropicHelpers.ts +++ b/src/libs/agent-runtime/utils/anthropicHelpers.ts @@ -32,7 +32,10 @@ export const buildAnthropicMessage = ( const content = message.content as string | UserMessageContentPart[]; return { content: typeof content === 'string' ? content : content.map((c) => buildAnthropicBlock(c)), - role: message.role === 'function' || message.role === 'system' ? 'assistant' : message.role, + role: + message.role === 'tool' || message.role === 'function' || message.role === 'system' + ? 'assistant' + : message.role, }; }; diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts index 89a4c8ec2fe6..1c58bf5b1723 100644 --- a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts +++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts @@ -1,4 +1,3 @@ -import { OpenAIStream, StreamingTextResponse } from 'ai'; import OpenAI, { ClientOptions } from 'openai'; import { LOBE_DEFAULT_MODEL_LIST } from '@/config/modelProviders'; @@ -11,6 +10,8 @@ import { AgentRuntimeError } from '../createError'; import { debugStream } from '../debugStream'; import { desensitizeUrl } from '../desensitizeUrl'; import { handleOpenAIError } from '../handleOpenAIError'; +import { StreamingResponse } from '../response'; +import { OpenAIStream } from '../streams'; // the model contains the following keywords is not a chat model, so we should filter them out const CHAT_MODELS_BLOCK_LIST = [ @@ -86,7 +87,7 @@ export const LobeOpenAICompatibleFactory = ({ debugStream(useForDebug.toReadableStream()).catch(console.error); } - return new StreamingTextResponse(OpenAIStream(prod, options?.callback), { + return StreamingResponse(OpenAIStream(prod, options?.callback), { headers: options?.headers, }); } catch (error) { diff --git a/src/libs/agent-runtime/utils/response.ts b/src/libs/agent-runtime/utils/response.ts new file mode 100644 index 000000000000..f8de781c9e2d --- /dev/null +++ b/src/libs/agent-runtime/utils/response.ts @@ -0,0 +1,12 @@ +export const StreamingResponse = ( + stream: ReadableStream, + options?: { headers?: Record }, +) => { + return new Response(stream, { + headers: { + 'Cache-Control': 'no-cache', + 'Content-Type': 'text/event-stream', + ...options?.headers, + }, + }); +}; diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts new file mode 100644 index 000000000000..0a8bed033dba --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/index.ts @@ -0,0 +1,2 @@ +export * from './openai'; +export * from './protocol'; diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts new file mode 100644 index 000000000000..dd8010fffe36 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/openai.ts @@ -0,0 +1,26 @@ +import { createCallbacksTransformer } from 'ai'; +import OpenAI from 'openai'; +import type { Stream } from 'openai/streaming'; + +import { ChatStreamCallbacks } from '../../types'; +import { transformOpenAIStream } from './protocol'; + +export const OpenAIStream = ( + stream: Stream, + callbacks?: ChatStreamCallbacks, +) => { + return stream + .toReadableStream() + .pipeThrough( + new TransformStream({ + transform: (chunk, controller) => { + const { type, id, data } = transformOpenAIStream(chunk); + + controller.enqueue(`id: ${id}\n`); + controller.enqueue(`event: ${type}\n`); + controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); + }, + }), + ) + .pipeThrough(createCallbacksTransformer(callbacks)); +}; diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts new file mode 100644 index 000000000000..d2a5b873147f --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/protocol.ts @@ -0,0 +1,41 @@ +import OpenAI from 'openai'; + +interface StreamProtocolChunk { + data: any; + id: string; + type: 'text' | 'tool_calls' | 'data' | 'stop'; +} + +export const transformOpenAIStream = (chunk: Uint8Array): StreamProtocolChunk => { + const decoder = new TextDecoder(); + + const chunkValue = decoder.decode(chunk, { stream: true }); + const jsonValue: OpenAI.ChatCompletionChunk = JSON.parse(chunkValue); + + // maybe need another structure to add support for multiple choices + const item = jsonValue.choices[0]; + + if (typeof item.delta.content === 'string') { + return { data: item.delta.content, id: jsonValue.id, type: 'text' }; + } + + if (item.delta.tool_calls) { + return { data: item.delta.tool_calls, id: jsonValue.id, type: 'tool_calls' }; + } + + if (item.delta.content === null) { + return { data: item.delta, id: jsonValue.id, type: 'data' }; + } + + // 给定结束原因 + if (item.finish_reason) { + return { data: item.finish_reason, id: jsonValue.id, type: 'stop' }; + } + + // 其余情况下,返回 delta 和 index + return { + data: { delta: item.delta, id: jsonValue.id, index: item.index }, + id: jsonValue.id, + type: 'data', + }; +}; diff --git a/src/locales/default/plugin.ts b/src/locales/default/plugin.ts index 25b4a954b244..cd8a58857183 100644 --- a/src/locales/default/plugin.ts +++ b/src/locales/default/plugin.ts @@ -4,7 +4,9 @@ export default { function_call: '函数调用', off: '关闭调试', on: '查看插件调用信息', + payload: '插件载荷', response: '返回结果', + tool_call: '工具调用请求', }, detailModal: { info: { diff --git a/src/services/chat.ts b/src/services/chat.ts index 2198a063155d..9af430a7f940 100644 --- a/src/services/chat.ts +++ b/src/services/chat.ts @@ -24,13 +24,13 @@ import { ChatErrorType } from '@/types/fetch'; import { ChatMessage } from '@/types/message'; import type { ChatStreamPayload, OpenAIChatMessage } from '@/types/openai/chat'; import { UserMessageContentPart } from '@/types/openai/chat'; -import { FetchSSEOptions, OnFinishHandler, fetchSSE, getMessageError } from '@/utils/fetch'; +import { FetchSSEOptions, fetchSSE, getMessageError } from '@/utils/fetch'; import { createTraceHeader, getTraceId } from '@/utils/trace'; import { createHeaderWithAuth, getProviderAuthPayload } from './_auth'; import { API_ENDPOINTS } from './_url'; -interface FetchOptions { +interface FetchOptions extends FetchSSEOptions { isWelcomeQuestion?: boolean; signal?: AbortSignal | undefined; trace?: TracePayload; @@ -40,23 +40,14 @@ interface GetChatCompletionPayload extends Partial void; - onFinish?: OnFinishHandler; /** * 加载状态变化处理函数 * @param loading - 是否处于加载状态 */ onLoadingChange?: (loading: boolean) => void; - /** - * 消息处理函数 - * @param text - 消息内容 - */ - onMessageHandle?: (text: string) => void; /** * 请求对象 */ @@ -224,20 +215,15 @@ class ChatService { trace, isWelcomeQuestion, }: CreateAssistantMessageStream) => { - await fetchSSE( - () => - this.createAssistantMessage(params, { - isWelcomeQuestion, - signal: abortController?.signal, - trace: this.mapTrace(trace, TraceTagMap.Chat), - }), - { - onAbort, - onErrorHandle, - onFinish, - onMessageHandle, - }, - ); + await this.createAssistantMessage(params, { + isWelcomeQuestion, + onAbort, + onErrorHandle, + onFinish, + onMessageHandle, + signal: abortController?.signal, + trace: this.mapTrace(trace, TraceTagMap.Chat), + }); }; getChatCompletion = async (params: Partial, options?: FetchOptions) => { @@ -299,10 +285,14 @@ class ChatService { provider, }); - return fetch(API_ENDPOINTS.chat(provider), { + return fetchSSE(API_ENDPOINTS.chat(provider), { body: JSON.stringify(payload), headers, method: 'POST', + onAbort: options?.onAbort, + onErrorHandle: options?.onErrorHandle, + onFinish: options?.onFinish, + onMessageHandle: options?.onMessageHandle, signal, }); }; @@ -360,20 +350,15 @@ class ChatService { onLoadingChange?.(true); - const data = await fetchSSE( - () => - this.getChatCompletion(params, { - signal: abortController?.signal, - trace: this.mapTrace(trace, TraceTagMap.SystemChain), - }), - { - onErrorHandle: (error) => { - errorHandle(new Error(error.message), error); - }, - onFinish, - onMessageHandle, + const data = await this.getChatCompletion(params, { + onErrorHandle: (error) => { + errorHandle(new Error(error.message), error); }, - ).catch(errorHandle); + onFinish, + onMessageHandle, + signal: abortController?.signal, + trace: this.mapTrace(trace, TraceTagMap.SystemChain), + }).catch(errorHandle); onLoadingChange?.(false); @@ -424,11 +409,25 @@ class ChatService { return { content: getContent(m), role: m.role }; } + case 'assistant': { + return { content: m.content, role: m.role, tool_calls: m.tool_calls }; + } + + // TODO: need to be removed after upgrade case 'function': { const name = m.plugin?.identifier as string; return { content: m.content, name, role: m.role }; } + case 'tool': { + return { + content: m.content, + name: m.tool_calls?.find((tool) => tool.id === m.tool?.id)?.function.name, + role: m.role, + tool_call_id: m.tool?.id, + }; + } + default: { return { content: m.content, role: m.role }; } diff --git a/src/store/chat/slices/message/action.test.ts b/src/store/chat/slices/message/action.test.ts index a6355ba96850..d6d6f634ac3d 100644 --- a/src/store/chat/slices/message/action.test.ts +++ b/src/store/chat/slices/message/action.test.ts @@ -645,10 +645,7 @@ describe('chatMessage actions', () => { messages, assistantMessageId, ); - expect(response.content).toEqual(aiResponse); expect(response.isFunctionCall).toEqual(false); - expect(response.functionCallAtEnd).toEqual(false); - expect(response.functionCallContent).toEqual(''); }); }); @@ -667,10 +664,7 @@ describe('chatMessage actions', () => { messages, assistantMessageId, ); - expect(response.content).toEqual(aiResponse); expect(response.isFunctionCall).toEqual(true); - expect(response.functionCallAtEnd).toEqual(false); - expect(response.functionCallContent).toEqual(''); }); }); @@ -689,12 +683,7 @@ describe('chatMessage actions', () => { messages, assistantMessageId, ); - expect(response.content).toEqual(aiResponse); expect(response.isFunctionCall).toEqual(true); - expect(response.functionCallAtEnd).toEqual(true); - expect(response.functionCallContent).toEqual( - '{"tool_calls":[{"id":"call_sbca","type":"function","function":{"name":"pluginName____apiName","arguments":{"key":"value"}}}]}', - ); }); }); diff --git a/src/store/chat/slices/message/action.ts b/src/store/chat/slices/message/action.ts index b256bcb89b81..1409db34a38d 100644 --- a/src/store/chat/slices/message/action.ts +++ b/src/store/chat/slices/message/action.ts @@ -6,7 +6,7 @@ import { template } from 'lodash-es'; import { SWRResponse, mutate } from 'swr'; import { StateCreator } from 'zustand/vanilla'; -import { LOADING_FLAT, isFunctionMessageAtStart, testFunctionMessageAtEnd } from '@/const/message'; +import { LOADING_FLAT } from '@/const/message'; import { TraceEventType, TraceNameMap } from '@/const/trace'; import { useClientDataSWR } from '@/libs/swr'; import { chatService } from '@/services/chat'; @@ -17,7 +17,7 @@ import { useAgentStore } from '@/store/agent'; import { agentSelectors } from '@/store/agent/selectors'; import { chatHelpers } from '@/store/chat/helpers'; import { ChatStore } from '@/store/chat/store'; -import { ChatMessage } from '@/types/message'; +import { ChatMessage, MessageToolCall } from '@/types/message'; import { TraceEventPayloads } from '@/types/trace'; import { setNamespace } from '@/utils/storeDebug'; import { nanoid } from '@/utils/uuid'; @@ -105,9 +105,6 @@ export interface ChatMessageAction { assistantMessageId: string, params?: ProcessMessageParams, ) => Promise<{ - content: string; - functionCallAtEnd: boolean; - functionCallContent: string; isFunctionCall: boolean; traceId?: string; }>; @@ -123,7 +120,11 @@ export interface ChatMessageAction { * @param id * @param content */ - internal_updateMessageContent: (id: string, content: string) => Promise; + internal_updateMessageContent: ( + id: string, + content: string, + toolCalls?: MessageToolCall[], + ) => Promise; internal_createMessage: (params: CreateMessageParams) => Promise; internal_resendMessage: (id: string, traceId?: string) => Promise; internal_traceMessage: (id: string, payload: TraceEventPayloads) => Promise; @@ -310,8 +311,7 @@ export const chatMessage: StateCreator< // the internal process method of the AI message internal_coreProcessMessage: async (messages, userMessageId, params) => { - const { internal_fetchAIChatMessage, triggerFunctionCall, refreshMessages, activeTopicId } = - get(); + const { internal_fetchAIChatMessage, triggerToolCalls, refreshMessages, activeTopicId } = get(); const { model, provider } = getAgentConfig(); @@ -327,39 +327,15 @@ export const chatMessage: StateCreator< topicId: activeTopicId, // if there is activeTopicId,then add it to topicId }; - const mid = await get().internal_createMessage(assistantMessage); + const assistantId = await get().internal_createMessage(assistantMessage); // 2. fetch the AI response - const { isFunctionCall, content, functionCallAtEnd, functionCallContent, traceId } = - await internal_fetchAIChatMessage(messages, mid, params); + const { isFunctionCall } = await internal_fetchAIChatMessage(messages, assistantId, params); // 3. if it's the function call message, trigger the function method if (isFunctionCall) { - let functionId = mid; - - // if the function call is at the end of the message, then create a new function message - if (functionCallAtEnd) { - // create a new separate message and remove the function call from the prev message - - await get().internal_updateMessageContent(mid, content.replace(functionCallContent, '')); - - const functionMessage: CreateMessageParams = { - role: 'function', - content: functionCallContent, - fromModel: model, - fromProvider: provider, - - parentId: userMessageId, - sessionId: get().activeId, - topicId: activeTopicId, - traceId, - }; - - functionId = await get().internal_createMessage(functionMessage); - } - await refreshMessages(); - await triggerFunctionCall(functionId); + await triggerToolCalls(assistantId); } }, internal_dispatchMessage: (payload) => { @@ -369,7 +345,7 @@ export const chatMessage: StateCreator< const messages = messagesReducer(get().messages, payload); - set({ messages }, false, n(`dispatchMessage/${payload.type}`, payload)); + set({ messages }, false, { type: `dispatchMessage/${payload.type}`, payload }); }, internal_fetchAIChatMessage: async (messages, assistantId, params) => { const { @@ -432,10 +408,7 @@ export const chatMessage: StateCreator< config.params.max_tokens = 2048; } - let output = ''; let isFunctionCall = false; - let functionCallAtEnd = false; - let functionCallContent = ''; let msgTraceId: string | undefined; const { startAnimation, stopAnimation, outputQueue, isAnimationActive } = @@ -464,7 +437,7 @@ export const chatMessage: StateCreator< onAbort: async () => { stopAnimation(); }, - onFinish: async (content, { traceId, observationId }) => { + onFinish: async (content, { traceId, observationId, toolCalls }) => { stopAnimation(); // if there is traceId, update it if (traceId) { @@ -483,22 +456,27 @@ export const chatMessage: StateCreator< } // update the content after fetch result - await internal_updateMessageContent(assistantId, content); + await internal_updateMessageContent(assistantId, content, toolCalls); }, - onMessageHandle: async (text) => { - output += text; - outputQueue.push(...text.split('')); - - // is this message is just a function call - if (isFunctionMessageAtStart(output)) { - stopAnimation(); - internal_dispatchMessage({ - id: assistantId, - key: 'content', - type: 'updateMessage', - value: output, - }); - isFunctionCall = true; + onMessageHandle: async (chunk) => { + switch (chunk.type) { + case 'text': { + outputQueue.push(...chunk.text.split('')); + break; + } + + // is this message is just a tool call + case 'tool_calls': { + internal_dispatchMessage({ + id: assistantId, + type: 'updateMessages', + value: { + tool_calls: chunk.tool_calls, + tools: get().internal_transformToolCalls(chunk.tool_calls), + }, + }); + isFunctionCall = true; + } } // if it's the first time to receive the message, @@ -510,23 +488,7 @@ export const chatMessage: StateCreator< internal_toggleChatLoading(false, undefined, n('generateMessage(end)') as string); - // also exist message like this: - // 请稍等,我帮您查询一下。{"tool_calls":[{"id":"call_sbca","type":"function","function":{"name":"pluginName____apiName","arguments":{"key":"value"}}}]} - if (!isFunctionCall) { - const { content, valid } = testFunctionMessageAtEnd(output); - - // if fc at end, replace the message - if (valid) { - isFunctionCall = true; - functionCallAtEnd = true; - functionCallContent = content; - } - } - return { - content: output, - functionCallAtEnd, - functionCallContent, isFunctionCall, traceId: msgTraceId, }; @@ -610,15 +572,27 @@ export const chatMessage: StateCreator< await internal_coreProcessMessage(contextMessages, latestMsg.id, { traceId }); }, - internal_updateMessageContent: async (id, content) => { - const { internal_dispatchMessage, refreshMessages } = get(); + internal_updateMessageContent: async (id, content, toolCalls) => { + const { internal_dispatchMessage, refreshMessages, internal_transformToolCalls } = get(); // Due to the async update method and refresh need about 100ms // we need to update the message content at the frontend to avoid the update flick // refs: https://medium.com/@kyledeguzmanx/what-are-optimistic-updates-483662c3e171 - internal_dispatchMessage({ id, key: 'content', type: 'updateMessage', value: content }); + if (toolCalls) { + internal_dispatchMessage({ + id, + type: 'updateMessages', + value: { tool_calls: toolCalls, tools: internal_transformToolCalls(toolCalls) }, + }); + } else { + internal_dispatchMessage({ id, type: 'updateMessages', value: { content } }); + } - await messageService.updateMessage(id, { content }); + await messageService.updateMessage(id, { + content, + tool_calls: toolCalls, + tools: toolCalls ? internal_transformToolCalls(toolCalls) : undefined, + }); await refreshMessages(); }, @@ -685,7 +659,7 @@ export const chatMessage: StateCreator< buffer += charsToAdd; // 更新消息内容,这里可能需要结合实际情况调整 - internal_dispatchMessage({ id, key: 'content', type: 'updateMessage', value: buffer }); + internal_dispatchMessage({ id, type: 'updateMessages', value: { content: buffer } }); // 设置下一个字符的延迟 animationTimeoutId = setTimeout(updateText, 16); // 16 毫秒的延迟模拟打字机效果 diff --git a/src/store/chat/slices/message/reducer.ts b/src/store/chat/slices/message/reducer.ts index ee75d0436b31..0310e4e9df8e 100644 --- a/src/store/chat/slices/message/reducer.ts +++ b/src/store/chat/slices/message/reducer.ts @@ -11,6 +11,13 @@ interface UpdateMessage { type: 'updateMessage'; value: ChatMessage[keyof ChatMessage]; } + +interface UpdateMessages { + id: string; + type: 'updateMessages'; + value: Partial; +} + interface CreateMessage { id: string; type: 'createMessage'; @@ -37,6 +44,7 @@ interface UpdateMessageExtra { export type MessageDispatch = | CreateMessage | UpdateMessage + | UpdateMessages | UpdatePluginState | UpdateMessageExtra | DeleteMessage; @@ -54,6 +62,15 @@ export const messagesReducer = (state: ChatMessage[], payload: MessageDispatch): message.updatedAt = Date.now(); }); } + case 'updateMessages': { + return produce(state, (draftState) => { + const { id, value } = payload; + const index = draftState.findIndex((i) => i.id === id); + if (index < 0) return; + + draftState[index] = merge(draftState[index], { ...value, updatedAt: Date.now() }); + }); + } case 'updateMessageExtra': { return produce(state, (draftState) => { @@ -67,7 +84,7 @@ export const messagesReducer = (state: ChatMessage[], payload: MessageDispatch): message.extra[key] = value; } - message.updateAt = Date.now(); + message.updatedAt = Date.now(); }); } diff --git a/src/store/chat/slices/message/selectors.ts b/src/store/chat/slices/message/selectors.ts index 91307800179e..20bb131b8c13 100644 --- a/src/store/chat/slices/message/selectors.ts +++ b/src/store/chat/slices/message/selectors.ts @@ -28,17 +28,9 @@ const getMeta = (message: ChatMessage) => { return message.meta; } - case 'assistant': { + default: { return sessionMetaSelectors.currentAgentMeta(useSessionStore.getState()); } - - case 'function': { - // TODO: 后续改成将 plugin metadata 写入 message metadata 的方案 - return { - avatar: '🧩', - title: 'plugin-unknown', - }; - } } }; diff --git a/src/store/chat/slices/plugin/action.test.ts b/src/store/chat/slices/plugin/action.test.ts index cb7c2647251e..35c6d3cbc26d 100644 --- a/src/store/chat/slices/plugin/action.test.ts +++ b/src/store/chat/slices/plugin/action.test.ts @@ -8,7 +8,7 @@ import { messageService } from '@/services/message'; import { chatSelectors } from '@/store/chat/selectors'; import { useChatStore } from '@/store/chat/store'; import { useToolStore } from '@/store/tool'; -import { ChatPluginPayload } from '@/types/message'; +import { ChatToolPayload } from '@/types/message'; import { LobeTool } from '@/types/tool'; const invokeStandaloneTypePlugin = useChatStore.getState().invokeStandaloneTypePlugin; @@ -172,7 +172,7 @@ describe('ChatPluginAction', () => { }); }); - describe('triggerFunctionCall', () => { + describe('triggerToolCalls', () => { it('should trigger a function call and update the plugin message accordingly', async () => { const messageId = 'message-id'; const messageContent = JSON.stringify({ @@ -207,7 +207,7 @@ describe('ChatPluginAction', () => { const { result } = renderHook(() => useChatStore()); await act(async () => { - await result.current.triggerFunctionCall(messageId); + await result.current.triggerToolCalls(messageId); }); expect(chatSelectors.getMessageById).toHaveBeenCalledWith(messageId); @@ -255,7 +255,7 @@ describe('ChatPluginAction', () => { vi.spyOn(result.current, 'refreshMessages'); await act(async () => { - await result.current.triggerFunctionCall(messageId); + await result.current.triggerToolCalls(messageId); }); expect(result.current.refreshMessages).toHaveBeenCalled(); @@ -308,7 +308,7 @@ describe('ChatPluginAction', () => { const { result } = renderHook(() => useChatStore()); await act(async () => { - await result.current.triggerFunctionCall(messageId); + await result.current.triggerToolCalls(messageId); }); // 验证 refreshMessages 是否被调用 @@ -349,7 +349,7 @@ describe('ChatPluginAction', () => { const { result } = renderHook(() => useChatStore()); await act(async () => { - await result.current.triggerFunctionCall(messageId); + await result.current.triggerToolCalls(messageId); }); // 验证 refreshMessages 是否被调用 @@ -393,7 +393,7 @@ describe('ChatPluginAction', () => { const { result } = renderHook(() => useChatStore()); await act(async () => { - await result.current.triggerFunctionCall(messageId); + await result.current.triggerToolCalls(messageId); }); // 验证 refreshMessages 是否被调用 @@ -510,7 +510,7 @@ describe('ChatPluginAction', () => { const payload = { apiName: 'text2image', arguments: JSON.stringify({ key: 'value' }), - } as ChatPluginPayload; + } as ChatToolPayload; const messageId = 'message-id'; const toolResponse = JSON.stringify({ abc: 'data' }); @@ -557,7 +557,7 @@ describe('ChatPluginAction', () => { const payload = { apiName: 'text2image', arguments: JSON.stringify({ key: 'value' }), - } as ChatPluginPayload; + } as ChatToolPayload; const messageId = 'message-id'; const toolResponse = 'Builtin tool response'; @@ -606,7 +606,7 @@ describe('ChatPluginAction', () => { const payload = { apiName: 'builtinApi', arguments: JSON.stringify({ key: 'value' }), - } as ChatPluginPayload; + } as ChatToolPayload; const messageId = 'message-id'; const error = new Error('Builtin tool failed'); @@ -652,7 +652,7 @@ describe('ChatPluginAction', () => { identifier: 'abc', type: 'markdown', arguments: JSON.stringify({ key: 'value' }), - } as ChatPluginPayload; + } as ChatToolPayload; const messageId = 'message-id'; const runPluginApiMock = vi.fn(); @@ -678,7 +678,7 @@ describe('ChatPluginAction', () => { const payload = { identifier: 'pluginName', - } as ChatPluginPayload; + } as ChatToolPayload; act(() => { useToolStore.setState({ diff --git a/src/store/chat/slices/plugin/action.ts b/src/store/chat/slices/plugin/action.ts index 4715b3938538..f928d5b6a2f4 100644 --- a/src/store/chat/slices/plugin/action.ts +++ b/src/store/chat/slices/plugin/action.ts @@ -3,14 +3,14 @@ import { t } from 'i18next'; import { Md5 } from 'ts-md5'; import { StateCreator } from 'zustand/vanilla'; +import { LOADING_FLAT } from '@/const/message'; import { PLUGIN_SCHEMA_API_MD5_PREFIX, PLUGIN_SCHEMA_SEPARATOR } from '@/const/plugin'; import { chatService } from '@/services/chat'; import { CreateMessageParams, messageService } from '@/services/message'; import { ChatStore } from '@/store/chat/store'; import { useToolStore } from '@/store/tool'; import { pluginSelectors } from '@/store/tool/selectors'; -import { ChatPluginPayload } from '@/types/message'; -import { OpenAIToolCall } from '@/types/openai/functionCall'; +import { ChatToolPayload, MessageToolCall } from '@/types/message'; import { setNamespace } from '@/utils/storeDebug'; import { chatSelectors } from '../../slices/message/selectors'; @@ -24,13 +24,15 @@ export interface ChatPluginAction { content: string, triggerAiMessage?: boolean, ) => Promise; - invokeBuiltinTool: (id: string, payload: ChatPluginPayload) => Promise; - invokeDefaultTypePlugin: (id: string, payload: any) => Promise; - invokeMarkdownTypePlugin: (id: string, payload: ChatPluginPayload) => Promise; - invokeStandaloneTypePlugin: (id: string, payload: ChatPluginPayload) => Promise; - runPluginApi: (id: string, payload: ChatPluginPayload) => Promise; + internal_transformToolCalls: (toolCalls: MessageToolCall[]) => ChatToolPayload[]; + invokeBuiltinTool: (id: string, payload: ChatToolPayload) => Promise; + invokeDefaultTypePlugin: (id: string, payload: any) => Promise; + invokeMarkdownTypePlugin: (id: string, payload: ChatToolPayload) => Promise; + invokeStandaloneTypePlugin: (id: string, payload: ChatToolPayload) => Promise; + runPluginApi: (id: string, payload: ChatToolPayload) => Promise; triggerAIMessage: (id: string, traceId?: string) => Promise; - triggerFunctionCall: (id: string) => Promise; + triggerToolCalls: (id: string) => Promise; + updatePluginState: (id: string, key: string, value: any) => Promise; } @@ -61,6 +63,35 @@ export const chatPlugin: StateCreator< if (triggerAiMessage) await triggerAIMessage(id); }, + internal_transformToolCalls: (toolCalls) => { + return toolCalls.map((toolCall) => { + let payload: ChatToolPayload; + + const [identifier, apiName, type] = toolCall.function.name.split(PLUGIN_SCHEMA_SEPARATOR); + + payload = { + apiName, + arguments: toolCall.function.arguments, + id: toolCall.id, + identifier, + type: (type ?? 'default') as any, + }; + + // if the apiName is md5, try to find the correct apiName in the plugins + if (apiName.startsWith(PLUGIN_SCHEMA_API_MD5_PREFIX)) { + const md5 = apiName.replace(PLUGIN_SCHEMA_API_MD5_PREFIX, ''); + const manifest = pluginSelectors.getPluginManifestById(identifier)(useToolStore.getState()); + + const api = manifest?.api.find((api) => Md5.hashStr(api.name).toString() === md5); + if (api) { + payload.apiName = api.name; + } + } + + return payload; + }); + }, + invokeBuiltinTool: async (id, payload) => { const { internal_toggleChatLoading, internal_updateMessageContent } = get(); const params = JSON.parse(payload.arguments); @@ -94,14 +125,13 @@ export const chatPlugin: StateCreator< }, invokeDefaultTypePlugin: async (id, payload) => { - const { runPluginApi, triggerAIMessage } = get(); + const { runPluginApi } = get(); const data = await runPluginApi(id, payload); if (!data) return; - const traceId = chatSelectors.getTraceIdByMessageId(id)(get()); - await triggerAIMessage(id, traceId); + return data; }, invokeMarkdownTypePlugin: async (id, payload) => { @@ -181,89 +211,66 @@ export const chatPlugin: StateCreator< await internal_coreProcessMessage(chats, id, { traceId }); }, - triggerFunctionCall: async (id) => { - const message = chatSelectors.getMessageById(id)(get()); - if (!message) return; + triggerToolCalls: async (assistantId) => { + const message = chatSelectors.getMessageById(assistantId)(get()); + if (!message || !message.tools) return; const { invokeDefaultTypePlugin, invokeMarkdownTypePlugin, invokeStandaloneTypePlugin, invokeBuiltinTool, - refreshMessages, - internal_resendMessage, - deleteMessage, + triggerAIMessage, } = get(); - let payload = { apiName: '', identifier: '' } as ChatPluginPayload; - - // 识别到内容是 function_call 的情况下 - // 将 function_call 转换为 plugin request payload - if (message.content) { - const { tool_calls } = JSON.parse(message.content) as { - tool_calls: OpenAIToolCall[]; - }; - - const function_call = tool_calls[0].function; - - const [identifier, apiName, type] = function_call.name.split(PLUGIN_SCHEMA_SEPARATOR); - - payload = { - apiName, - arguments: function_call.arguments, - identifier, - type: (type ?? 'default') as any, + let shouldCreateMessage = false; + let latestToolId = ''; + const messagePools = message.tools.map(async (payload) => { + const toolMessage: CreateMessageParams = { + content: LOADING_FLAT, + parentId: assistantId, + role: 'tool', + sessionId: get().activeId, + tool: payload, + topicId: get().activeTopicId, // if there is activeTopicId,then add it to topicId }; - // fix https://github.com/lobehub/lobe-chat/issues/1094, remove and retry after experiencing plugin illusion - if (!apiName) { - internal_resendMessage(id); - deleteMessage(id); - return; - } - - // if the apiName is md5, try to find the correct apiName in the plugins - if (apiName.startsWith(PLUGIN_SCHEMA_API_MD5_PREFIX)) { - const md5 = apiName.replace(PLUGIN_SCHEMA_API_MD5_PREFIX, ''); - const manifest = pluginSelectors.getPluginManifestById(identifier)(useToolStore.getState()); - - const api = manifest?.api.find((api) => Md5.hashStr(api.name).toString() === md5); - if (!api) return; - payload.apiName = api.name; + const id = await get().internal_createMessage(toolMessage); + + switch (payload.type) { + case 'standalone': { + await invokeStandaloneTypePlugin(id, payload); + break; + } + + case 'markdown': { + await invokeMarkdownTypePlugin(id, payload); + break; + } + + case 'builtin': { + await invokeBuiltinTool(id, payload); + break; + } + + default: { + const data = await invokeDefaultTypePlugin(id, payload); + if (data) { + shouldCreateMessage = true; + latestToolId = id; + } + } } - } else { - if (message.plugin) payload = message.plugin; - } - - if (!payload.apiName) return; - - await messageService.updateMessage(id, { - content: !!message.content ? '' : undefined, - plugin: payload, - role: 'function', }); - await refreshMessages(); - switch (payload.type) { - case 'standalone': { - await invokeStandaloneTypePlugin(id, payload); - break; - } + await Promise.all(messagePools); - case 'markdown': { - await invokeMarkdownTypePlugin(id, payload); - break; - } + // only default type tool calls should trigger AI message + if (!shouldCreateMessage) return; - case 'builtin': { - await invokeBuiltinTool(id, payload); - break; - } + const traceId = chatSelectors.getTraceIdByMessageId(latestToolId)(get()); - default: { - await invokeDefaultTypePlugin(id, payload); - } - } + await triggerAIMessage(latestToolId, traceId); }, updatePluginState: async (id, key, value) => { diff --git a/src/types/llm.ts b/src/types/llm.ts index 81bfac74c760..2e9d87ef3755 100644 --- a/src/types/llm.ts +++ b/src/types/llm.ts @@ -75,7 +75,7 @@ export interface LLMParams { top_p?: number; } -export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function'; +export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function' | 'tool'; export interface LLMMessage { content: string; diff --git a/src/types/message/index.ts b/src/types/message/index.ts index 0b8b5a1e7a01..c0bed2b61f99 100644 --- a/src/types/message/index.ts +++ b/src/types/message/index.ts @@ -5,7 +5,7 @@ import { ErrorType } from '@/types/fetch'; import { LLMRoleType } from '../llm'; import { BaseDataModel } from '../meta'; -import { ChatPluginPayload } from './tools'; +import { ChatPluginPayload, ChatToolPayload, MessageToolCall } from './tools'; import { Translate } from './translate'; /** @@ -43,6 +43,10 @@ export interface ChatMessage extends BaseDataModel { } & Record; files?: string[]; + /** + * only used in tool calling + */ + name?: string; /** * observation id */ @@ -51,9 +55,12 @@ export interface ChatMessage extends BaseDataModel { * parent message id */ parentId?: string; + /** + * @deprecated + */ plugin?: ChatPluginPayload; - pluginState?: any; + /** * quoted other message's id */ @@ -64,6 +71,10 @@ export interface ChatMessage extends BaseDataModel { role: LLMRoleType; sessionId?: string; + tool?: ChatToolPayload; + tool_calls?: MessageToolCall[]; + tools?: ChatToolPayload[]; + /** * 保存到主题的消息 */ diff --git a/src/types/message/tools.ts b/src/types/message/tools.ts index 39a7cb92fb52..ecca26af6a15 100644 --- a/src/types/message/tools.ts +++ b/src/types/message/tools.ts @@ -1,3 +1,6 @@ +import { DeepPartial } from 'utility-types'; +import { z } from 'zod'; + import { LobeToolRenderType } from '@/types/tool'; export interface ChatPluginPayload { @@ -6,3 +9,57 @@ export interface ChatPluginPayload { identifier: string; type: LobeToolRenderType; } + +export interface ChatToolPayload { + apiName: string; + arguments: string; + id: string; + identifier: string; + type: LobeToolRenderType; +} + +/** + * The function that the model called. + */ +export interface ToolFunction { + /** + * The arguments to call the function with, as generated by the model in JSON + * format. Note that the model does not always generate valid JSON, and may + * hallucinate parameters not defined by your function schema. Validate the + * arguments in your code before calling your function. + */ + arguments: string; + + /** + * The name of the function to call. + */ + name: string; +} + +export interface MessageToolCall { + /** + * The function that the model called. + */ + function: ToolFunction; + + /** + * The ID of the tool call. + */ + id: string; + + /** + * The type of the tool. Currently, only `function` is supported. + */ + type: 'function' | string; +} + +export type MessageToolCallChunk = DeepPartial & { index: number }; + +export const MessageToolCallSchema = z.object({ + function: z.object({ + arguments: z.string(), + name: z.string(), + }), + id: z.string(), + type: z.string(), +}); diff --git a/src/types/openai/chat.ts b/src/types/openai/chat.ts index e56d9ad48c0d..7e4c9bd7fd08 100644 --- a/src/types/openai/chat.ts +++ b/src/types/openai/chat.ts @@ -1,4 +1,5 @@ import { LLMRoleType } from '@/types/llm'; +import { MessageToolCall } from '@/types/message'; import { OpenAIFunctionCall } from './functionCall'; @@ -23,6 +24,9 @@ export interface OpenAIChatMessage { */ content: string | UserMessageContentPart[]; + /** + * @deprecated + */ function_call?: OpenAIFunctionCall; name?: string; /** @@ -30,6 +34,8 @@ export interface OpenAIChatMessage { * @description 消息发送者的角色 */ role: LLMRoleType; + tool_call_id?: string; + tool_calls?: MessageToolCall[]; } /** diff --git a/src/utils/fetch.test.ts b/src/utils/fetch.test.ts index 57187b283e62..954c08551987 100644 --- a/src/utils/fetch.test.ts +++ b/src/utils/fetch.test.ts @@ -2,7 +2,7 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import { ErrorResponse } from '@/types/fetch'; -import { getMessageError } from './fetch'; +import { getMessageError, parseToolCalls } from './fetch'; // 模拟 i18next vi.mock('i18next', () => ({ @@ -77,3 +77,94 @@ describe('getMessageError', () => { expect(mockResponse.json).toHaveBeenCalled(); }); }); + +describe('parseToolCalls', () => { + it('should create add new item', () => { + const chunk = [ + { index: 0, id: '1', type: 'function', function: { name: 'func', arguments: '' } }, + ]; + + const result = parseToolCalls([], chunk); + expect(result).toEqual([ + { id: '1', type: 'function', function: { name: 'func', arguments: '' } }, + ]); + }); + + it('should update arguments if there is a toolCall', () => { + const origin = [{ id: '1', type: 'function', function: { name: 'func', arguments: '' } }]; + + const chunk1 = [{ index: 0, function: { arguments: '{"lo' } }]; + + const result1 = parseToolCalls(origin, chunk1); + expect(result1).toEqual([ + { id: '1', type: 'function', function: { name: 'func', arguments: '{"lo' } }, + ]); + + const chunk2 = [{ index: 0, function: { arguments: 'cation\\": \\"Hangzhou\\"}' } }]; + const result2 = parseToolCalls(result1, chunk2); + + expect(result2).toEqual([ + { + id: '1', + type: 'function', + function: { name: 'func', arguments: '{"location\\": \\"Hangzhou\\"}' }, + }, + ]); + }); + + it('should add a new tool call if the index is different', () => { + const origin = [ + { + id: '1', + type: 'function', + function: { name: 'func', arguments: '{"location\\": \\"Hangzhou\\"}' }, + }, + ]; + + const chunk = [ + { + index: 1, + id: '2', + type: 'function', + function: { name: 'func', arguments: '' }, + }, + ]; + + const result1 = parseToolCalls(origin, chunk); + expect(result1).toEqual([ + { + id: '1', + type: 'function', + function: { name: 'func', arguments: '{"location\\": \\"Hangzhou\\"}' }, + }, + { id: '2', type: 'function', function: { name: 'func', arguments: '' } }, + ]); + }); + + it('should update correct arguments if there are multi tool calls', () => { + const origin = [ + { + id: '1', + type: 'function', + function: { name: 'func', arguments: '{"location\\": \\"Hangzhou\\"}' }, + }, + { id: '2', type: 'function', function: { name: 'func', arguments: '' } }, + ]; + + const chunk = [{ index: 1, function: { arguments: '{"location\\": \\"Beijing\\"}' } }]; + + const result1 = parseToolCalls(origin, chunk); + expect(result1).toEqual([ + { + id: '1', + type: 'function', + function: { name: 'func', arguments: '{"location\\": \\"Hangzhou\\"}' }, + }, + { + id: '2', + type: 'function', + function: { name: 'func', arguments: '{"location\\": \\"Beijing\\"}' }, + }, + ]); + }); +}); diff --git a/src/utils/fetch.ts b/src/utils/fetch.ts index dcf549dbb72f..5ffc062cc990 100644 --- a/src/utils/fetch.ts +++ b/src/utils/fetch.ts @@ -1,8 +1,15 @@ +import { fetchEventSource } from '@microsoft/fetch-event-source'; import { t } from 'i18next'; +import { produce } from 'immer'; import { LOBE_CHAT_OBSERVATION_ID, LOBE_CHAT_TRACE_ID } from '@/const/trace'; import { ErrorResponse, ErrorType } from '@/types/fetch'; -import { ChatMessageError } from '@/types/message'; +import { + ChatMessageError, + MessageToolCall, + MessageToolCallChunk, + MessageToolCallSchema, +} from '@/types/message'; export const getMessageError = async (response: Response) => { let chatMessageError: ChatMessageError; @@ -32,68 +39,115 @@ export type OnFinishHandler = ( text: string, context: { observationId?: string | null; + toolCalls?: MessageToolCall[]; traceId?: string | null; type?: SSEFinishType; }, ) => Promise; +interface MessageTextChunk { + text: string; + type: 'text'; +} + +interface MessageToolCallsChunk { + tool_calls: MessageToolCall[]; + type: 'tool_calls'; +} + export interface FetchSSEOptions { onAbort?: (text: string) => Promise; onErrorHandle?: (error: ChatMessageError) => void; onFinish?: OnFinishHandler; - onMessageHandle?: (text: string) => void; + onMessageHandle?: (chunk: MessageTextChunk | MessageToolCallsChunk) => void; } +export const parseToolCalls = (origin: MessageToolCall[], value: MessageToolCallChunk[]) => + produce(origin, (draft) => { + if (draft.length === 0) { + draft.push(...value.map((item) => MessageToolCallSchema.parse(item))); + } else { + value.forEach(({ index, ...item }) => { + if (!draft?.[index]) { + draft?.splice(index, 0, MessageToolCallSchema.parse(item)); + } else { + if (item.function?.arguments) { + draft[index].function.arguments += item.function.arguments; + } + } + }); + } + }); + /** * Fetch data using stream method */ -export const fetchSSE = async (fetchFn: () => Promise, options: FetchSSEOptions = {}) => { - const response = await fetchFn(); - - // 如果不 ok 说明有请求错误 - if (!response.ok) { - const chatMessageError = await getMessageError(response); - - options.onErrorHandle?.(chatMessageError); - return; - } - - const returnRes = response.clone(); - - const data = response.body; - - if (!data) return; +// eslint-disable-next-line no-undef +export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptions = {}) => { let output = ''; - const reader = data.getReader(); - const decoder = new TextDecoder(); + let toolCalls: undefined | MessageToolCall[]; - let done = false; let finishedType: SSEFinishType = 'done'; + let response!: Response; - while (!done) { - try { - const { value, done: doneReading } = await reader.read(); - done = doneReading; - const chunkValue = decoder.decode(value, { stream: true }); - - output += chunkValue; - options.onMessageHandle?.(chunkValue); - } catch (error) { - done = true; - - if ((error as TypeError).name === 'AbortError') { - finishedType = 'abort'; - options?.onAbort?.(output); - } else { - finishedType = 'error'; - console.error(error); - } - } - } + try { + await fetchEventSource(url, { + body: options.body, + headers: options.headers as Record, + method: options.method, + onerror: (error) => { + if ((error as TypeError).name === 'AbortError') { + finishedType = 'abort'; + options?.onAbort?.(output); + } else { + finishedType = 'error'; + console.error(error); + } + throw new Error('Fetch error'); + // options.onErrorHandle() + }, + onmessage: (ev) => { + const data = JSON.parse(ev.data); + switch (ev.event) { + case 'text': { + output += data; + options.onMessageHandle?.({ text: data, type: 'text' }); + break; + } + + case 'tool_calls': { + if (!toolCalls) { + toolCalls = []; + } + + toolCalls = parseToolCalls(toolCalls, data); + + options.onMessageHandle?.({ + tool_calls: toolCalls, + type: 'tool_calls', + }); + } + } + }, + onopen: async (res) => { + response = res.clone(); + + // 如果不 ok 说明有请求错误 + if (!response.ok) { + const chatMessageError = await getMessageError(res); + + options.onErrorHandle?.(chatMessageError); + return; + } + }, + + signal: options.signal, + }); + } catch {} const traceId = response.headers.get(LOBE_CHAT_TRACE_ID); const observationId = response.headers.get(LOBE_CHAT_OBSERVATION_ID); - await options?.onFinish?.(output, { observationId, traceId, type: finishedType }); + await options?.onFinish?.(output, { observationId, toolCalls, traceId, type: finishedType }); - return returnRes; + return response; }; From 24543c1b32b82765ae175913c22f9a69e8a9c77e Mon Sep 17 00:00:00 2001 From: arvinxx Date: Wed, 8 May 2024 00:07:32 +0800 Subject: [PATCH 02/24] =?UTF-8?q?=F0=9F=92=84=20style:=20design=20the=20To?= =?UTF-8?q?ol=20Call=20UI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/database/client/schemas/message.ts | 4 +- src/features/Conversation/Actions/Tool.tsx | 16 +++ src/features/Conversation/Actions/index.ts | 3 +- .../Messages/Assistant/ToolCalls/index.tsx | 84 +++++++------ .../Conversation/Messages/Assistant/index.tsx | 17 ++- .../Conversation/Messages/Default.tsx | 5 +- .../Conversation/Messages/Function.tsx | 35 ------ .../Tool}/Inspector/PluginResultJSON.tsx | 0 .../ToolCalls => Tool/Inspector}/Settings.tsx | 0 .../Tool}/Inspector/index.tsx | 69 ++++++----- .../Tool}/Inspector/style.ts | 0 .../Conversation/Messages/Tool/index.tsx | 41 ++++--- src/features/Conversation/Messages/index.ts | 3 +- .../Plugins/Inspector/Settings.tsx | 39 ------ .../Plugins/Render/StandaloneType/Iframe.tsx | 2 +- .../components/ChatItem/index.tsx | 4 - .../Conversation/components/SkeletonList.tsx | 4 +- src/features/Conversation/index.tsx | 5 +- src/locales/default/plugin.ts | 5 +- src/services/__tests__/chat.test.ts | 2 +- src/services/chat.ts | 70 +++++++---- src/store/chat/slices/message/action.test.ts | 111 ++++++------------ src/store/chat/slices/message/action.ts | 33 ++++-- .../chat/slices/message/selectors.test.ts | 106 ++++++----------- src/store/chat/slices/message/selectors.ts | 13 -- src/store/chat/slices/plugin/action.test.ts | 35 ++++-- src/store/chat/slices/plugin/action.ts | 67 ++++++----- src/store/chat/slices/topic/action.ts | 9 +- src/store/tool/selectors/tool.ts | 30 +---- src/types/message/index.ts | 9 +- src/utils/fetch.ts | 29 ++++- src/utils/toolCall.ts | 21 ++++ 32 files changed, 414 insertions(+), 457 deletions(-) create mode 100644 src/features/Conversation/Actions/Tool.tsx delete mode 100644 src/features/Conversation/Messages/Function.tsx rename src/features/Conversation/{Plugins => Messages/Tool}/Inspector/PluginResultJSON.tsx (100%) rename src/features/Conversation/Messages/{Assistant/ToolCalls => Tool/Inspector}/Settings.tsx (100%) rename src/features/Conversation/{Plugins => Messages/Tool}/Inspector/index.tsx (73%) rename src/features/Conversation/{Plugins => Messages/Tool}/Inspector/style.ts (100%) delete mode 100644 src/features/Conversation/Plugins/Inspector/Settings.tsx create mode 100644 src/utils/toolCall.ts diff --git a/src/database/client/schemas/message.ts b/src/database/client/schemas/message.ts index 288606953581..ba4d2cfe493b 100644 --- a/src/database/client/schemas/message.ts +++ b/src/database/client/schemas/message.ts @@ -1,7 +1,6 @@ /* eslint-disable sort-keys-fix/sort-keys-fix */ import { z } from 'zod'; -import { MessageToolCallSchema } from '@/types/message'; const TranslateSchema = z.object({ from: z.string().optional(), @@ -27,9 +26,8 @@ export const DB_MessageSchema = z.object({ favorite: z.number().int().min(0).max(1).optional(), error: z.any().optional(), - tool_calls: z.array(MessageToolCallSchema).optional(), tools: z.array(ToolCallSchema).optional(), - tool: ToolCallSchema.optional(), + tool_call_id: z.string().optional(), plugin: PluginSchema.optional(), pluginState: z.any().optional(), diff --git a/src/features/Conversation/Actions/Tool.tsx b/src/features/Conversation/Actions/Tool.tsx new file mode 100644 index 000000000000..d1f022c76b1a --- /dev/null +++ b/src/features/Conversation/Actions/Tool.tsx @@ -0,0 +1,16 @@ +import { memo } from 'react'; + +import { RenderAction } from '../types'; + +export const ToolActionsBar: RenderAction = memo(() => { + return undefined; + // const { regenerate } = useChatListActionsBar(); + // return ( + // + // ); +}); diff --git a/src/features/Conversation/Actions/index.ts b/src/features/Conversation/Actions/index.ts index 71feebab93b2..dfa380401829 100644 --- a/src/features/Conversation/Actions/index.ts +++ b/src/features/Conversation/Actions/index.ts @@ -9,13 +9,14 @@ import { OnActionsClick, RenderAction } from '../types'; import { AssistantActionsBar } from './Assistant'; import { DefaultActionsBar } from './Fallback'; import { FunctionActionsBar } from './Function'; +import { ToolActionsBar } from './Tool'; import { UserActionsBar } from './User'; export const renderActions: Record = { assistant: AssistantActionsBar, function: FunctionActionsBar, system: DefaultActionsBar, - tool: FunctionActionsBar, + tool: ToolActionsBar, user: UserActionsBar, }; diff --git a/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx b/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx index 681778525e1f..980982f2ac99 100644 --- a/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx +++ b/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx @@ -1,11 +1,12 @@ -import { Loading3QuartersOutlined } from '@ant-design/icons'; import { Avatar, Highlighter, Icon } from '@lobehub/ui'; import isEqual from 'fast-deep-equal'; -import { LucideChevronDown, LucideChevronUp, LucideToyBrick } from 'lucide-react'; +import { Loader2, LucideChevronDown, LucideChevronRight, LucideToyBrick } from 'lucide-react'; import { memo, useState } from 'react'; import { useTranslation } from 'react-i18next'; -import { Flexbox } from 'react-layout-kit'; +import { Center, Flexbox } from 'react-layout-kit'; +import { useChatStore } from '@/store/chat'; +import { chatSelectors } from '@/store/chat/slices/message/selectors'; import { pluginHelpers, useToolStore } from '@/store/tool'; import { toolSelectors } from '@/store/tool/selectors'; @@ -14,59 +15,64 @@ import { useStyles } from './style'; export interface InspectorProps { arguments?: string; identifier: string; - loading?: boolean; + messageId: string; } -const Inspector = memo(({ arguments: requestArgs = '{}', loading, identifier }) => { - const { t } = useTranslation('plugin'); - const { styles } = useStyles(); - const [open, setOpen] = useState(false); +const CallItem = memo( + ({ arguments: requestArgs = '{}', messageId, identifier }) => { + const { t } = useTranslation('plugin'); + const { styles } = useStyles(); + const [open, setOpen] = useState(false); + const loading = useChatStore(chatSelectors.isMessageGenerating(messageId)); - const pluginMeta = useToolStore(toolSelectors.getMetaById(identifier), isEqual); + const pluginMeta = useToolStore(toolSelectors.getMetaById(identifier), isEqual); - const pluginAvatar = pluginHelpers.getPluginAvatar(pluginMeta); + const pluginAvatar = pluginHelpers.getPluginAvatar(pluginMeta); - const pluginTitle = pluginHelpers.getPluginTitle(pluginMeta) ?? t('plugins.loading'); + const pluginTitle = pluginHelpers.getPluginTitle(pluginMeta) ?? t('unknownPlugin'); - const avatar = pluginAvatar ? ( - - ) : ( - - ); + const avatar = pluginAvatar ? ( + + ) : ( + + ); - let params; - try { - params = JSON.stringify(JSON.parse(requestArgs), null, 2); - } catch { - params = requestArgs; - } + let params; + try { + params = JSON.stringify(JSON.parse(requestArgs), null, 2); + } catch { + params = requestArgs; + } - return ( - - + return ( + { setOpen(!open); }} > - {loading ? ( -
- -
- ) : ( - avatar - )} - {pluginTitle} - + + {loading ? ( +
+ +
+ ) : ( + avatar + )} + {pluginTitle} +
+
+ {(open || loading) && {params}}
- {open && {params}} -
- ); -}); + ); + }, +); -export default Inspector; +export default CallItem; diff --git a/src/features/Conversation/Messages/Assistant/index.tsx b/src/features/Conversation/Messages/Assistant/index.tsx index d9ed3b78fb98..8f6afa3f4559 100644 --- a/src/features/Conversation/Messages/Assistant/index.tsx +++ b/src/features/Conversation/Messages/Assistant/index.tsx @@ -1,6 +1,7 @@ import { ReactNode, memo } from 'react'; import { Flexbox } from 'react-layout-kit'; +import { LOADING_FLAT } from '@/const/message'; import { useChatStore } from '@/store/chat'; import { chatSelectors } from '@/store/chat/selectors'; import { ChatMessage } from '@/types/message'; @@ -14,19 +15,29 @@ export const AssistantMessage = memo< } >(({ id, tools, content, ...props }) => { const editing = useChatStore(chatSelectors.isMessageEditing(id)); + const generating = useChatStore(chatSelectors.isMessageGenerating(id)); + + const isToolCallGenerating = generating && (content === LOADING_FLAT || !content) && !!tools; return ( {(content || editing) && ( - + )} - {!editing && ( + {!editing && tools && ( - {tools?.map((toolCall) => ( + {tools.map((toolCall) => ( ))} diff --git a/src/features/Conversation/Messages/Default.tsx b/src/features/Conversation/Messages/Default.tsx index 6d0cbe33fa04..bc62fc16c7d1 100644 --- a/src/features/Conversation/Messages/Default.tsx +++ b/src/features/Conversation/Messages/Default.tsx @@ -8,8 +8,11 @@ import BubblesLoading from '../components/BubblesLoading'; export const DefaultMessage = memo< ChatMessage & { editableContent: ReactNode; + isToolCallGenerating?: boolean; } ->(({ id, editableContent, content }) => { +>(({ id, editableContent, content, isToolCallGenerating }) => { + if (isToolCallGenerating) return; + if (content === LOADING_FLAT) return ; return
{editableContent}
; diff --git a/src/features/Conversation/Messages/Function.tsx b/src/features/Conversation/Messages/Function.tsx deleted file mode 100644 index ba90dba5e5f8..000000000000 --- a/src/features/Conversation/Messages/Function.tsx +++ /dev/null @@ -1,35 +0,0 @@ -import isEqual from 'fast-deep-equal'; -import { memo, useState } from 'react'; -import { Flexbox } from 'react-layout-kit'; - -import { useChatStore } from '@/store/chat'; -import { chatSelectors } from '@/store/chat/selectors'; -import { ChatMessage } from '@/types/message'; - -import Inspector from '../Plugins/Inspector'; -import PluginRender from '../Plugins/Render'; - -export const FunctionMessage = memo(({ id, content, plugin }) => { - const fcProps = useChatStore( - chatSelectors.getFunctionMessageProps({ content, id, plugin }), - isEqual, - ); - - const [showRender, setShow] = useState(true); - - return ( - - - {showRender && ( - - )} - - ); -}); diff --git a/src/features/Conversation/Plugins/Inspector/PluginResultJSON.tsx b/src/features/Conversation/Messages/Tool/Inspector/PluginResultJSON.tsx similarity index 100% rename from src/features/Conversation/Plugins/Inspector/PluginResultJSON.tsx rename to src/features/Conversation/Messages/Tool/Inspector/PluginResultJSON.tsx diff --git a/src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx b/src/features/Conversation/Messages/Tool/Inspector/Settings.tsx similarity index 100% rename from src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx rename to src/features/Conversation/Messages/Tool/Inspector/Settings.tsx diff --git a/src/features/Conversation/Plugins/Inspector/index.tsx b/src/features/Conversation/Messages/Tool/Inspector/index.tsx similarity index 73% rename from src/features/Conversation/Plugins/Inspector/index.tsx rename to src/features/Conversation/Messages/Tool/Inspector/index.tsx index 5eb9e5cc4bb3..9cf85dfcfbd5 100644 --- a/src/features/Conversation/Plugins/Inspector/index.tsx +++ b/src/features/Conversation/Messages/Tool/Inspector/index.tsx @@ -1,13 +1,12 @@ import { Loading3QuartersOutlined } from '@ant-design/icons'; -import { LobePluginType } from '@lobehub/chat-plugin-sdk'; -import { ActionIcon, Avatar, Highlighter, Icon } from '@lobehub/ui'; +import { ActionIcon, Avatar, Highlighter, Icon, Tag } from '@lobehub/ui'; import { Tabs } from 'antd'; import isEqual from 'fast-deep-equal'; import { LucideBug, LucideBugOff, LucideChevronDown, - LucideChevronUp, + LucideChevronRight, LucideToyBrick, } from 'lucide-react'; import { memo, useState } from 'react'; @@ -16,6 +15,7 @@ import { Flexbox } from 'react-layout-kit'; import { pluginHelpers, useToolStore } from '@/store/tool'; import { pluginSelectors, toolSelectors } from '@/store/tool/selectors'; +import { ChatPluginPayload } from '@/types/message'; import PluginResult from './PluginResultJSON'; import Settings from './Settings'; @@ -23,36 +23,34 @@ import { useStyles } from './style'; export interface InspectorProps { arguments?: string; - command?: any; content: string; - id?: string; + identifier?: string; loading?: boolean; + payload?: ChatPluginPayload; setShow?: (showRender: boolean) => void; showRender?: boolean; - type?: LobePluginType; } const Inspector = memo( ({ arguments: requestArgs = '{}', - command, + payload, showRender, loading, setShow, content, - id = 'unknown', - // type, + identifier = 'unknown', }) => { const { t } = useTranslation('plugin'); const { styles } = useStyles(); const [open, setOpen] = useState(false); - const pluginMeta = useToolStore(toolSelectors.getMetaById(id), isEqual); + const pluginMeta = useToolStore(toolSelectors.getMetaById(identifier), isEqual); - const showRightAction = useToolStore(pluginSelectors.isPluginHasUI(id)); + const showRightAction = useToolStore(pluginSelectors.isPluginHasUI(identifier)); const pluginAvatar = pluginHelpers.getPluginAvatar(pluginMeta); - const pluginTitle = pluginHelpers.getPluginTitle(pluginMeta) ?? t('plugins.loading'); + const pluginTitle = pluginHelpers.getPluginTitle(pluginMeta) ?? t('unknownPlugin'); const avatar = pluginAvatar ? ( @@ -62,7 +60,7 @@ const Inspector = memo( let args, params; try { - args = JSON.stringify(command, null, 2); + args = JSON.stringify(payload, null, 2); params = JSON.stringify(JSON.parse(requestArgs), null, 2); } catch { args = ''; @@ -81,29 +79,30 @@ const Inspector = memo( setShow?.(!showRender); }} > - {loading ? ( -
- -
- ) : ( - avatar - )} - {pluginTitle} - {showRightAction && } -
- { - - {/*{type === 'standalone' && }*/} - { - setOpen(!open); - }} - title={t(open ? 'debug.off' : 'debug.on')} - /> - + + {loading ? ( +
+ +
+ ) : ( + avatar + )} +
{pluginTitle}
+ {payload?.apiName}
- } + {showRightAction && } +
+ + + { + setOpen(!open); + }} + title={t(open ? 'debug.off' : 'debug.on')} + /> + +
{open && ( (({ id, content, tool }) => { - const fcProps = useChatStore( - chatSelectors.getFunctionMessageProps({ content, id, plugin: tool }), - isEqual, - ); - - const [showRender, setShow] = useState(true); +export const ToolMessage = memo(({ id, content, plugin }) => { + const loading = useChatStore(chatSelectors.isMessageGenerating(id)); - if (content === LOADING_FLAT) return ; + const [showRender, setShow] = useState(plugin?.type !== 'default'); return ( - - {showRender && ( + + {showRender || loading ? ( + ) : ( + + {plugin?.arguments || ''} + )} ); diff --git a/src/features/Conversation/Messages/index.ts b/src/features/Conversation/Messages/index.ts index 7a0469eef0c9..2abb0fd138d8 100644 --- a/src/features/Conversation/Messages/index.ts +++ b/src/features/Conversation/Messages/index.ts @@ -6,14 +6,13 @@ import { sessionSelectors } from '@/store/session/selectors'; import { OnAvatarsClick, RenderMessage } from '../types'; import { AssistantMessage } from './Assistant'; import { DefaultMessage } from './Default'; -import { FunctionMessage } from './Function'; import { ToolMessage } from './Tool'; import { UserMessage } from './User'; export const renderMessages: Record = { assistant: AssistantMessage, default: DefaultMessage, - function: FunctionMessage, + function: DefaultMessage, tool: ToolMessage, user: UserMessage, }; diff --git a/src/features/Conversation/Plugins/Inspector/Settings.tsx b/src/features/Conversation/Plugins/Inspector/Settings.tsx deleted file mode 100644 index e9183dfe24cf..000000000000 --- a/src/features/Conversation/Plugins/Inspector/Settings.tsx +++ /dev/null @@ -1,39 +0,0 @@ -import { ActionIcon } from '@lobehub/ui'; -import { LucideSettings } from 'lucide-react'; -import { memo, useState } from 'react'; -import { useTranslation } from 'react-i18next'; - -import PluginDetailModal from '@/features/PluginDetailModal'; -import { pluginHelpers, useToolStore } from '@/store/tool'; -import { pluginSelectors } from '@/store/tool/selectors'; - -const Settings = memo<{ id: string }>(({ id }) => { - const item = useToolStore(pluginSelectors.getPluginManifestById(id)); - const [open, setOpen] = useState(false); - const { t } = useTranslation('plugin'); - const hasSettings = pluginHelpers.isSettingSchemaNonEmpty(item?.settings); - - return ( - hasSettings && ( - <> - { - setOpen(true); - }} - title={t('setting')} - /> - { - setOpen(false); - }} - open={open} - schema={item?.settings} - /> - - ) - ); -}); - -export default Settings; diff --git a/src/features/Conversation/Plugins/Render/StandaloneType/Iframe.tsx b/src/features/Conversation/Plugins/Render/StandaloneType/Iframe.tsx index 6f21a4a0d2bf..d2c9d86074c1 100644 --- a/src/features/Conversation/Plugins/Render/StandaloneType/Iframe.tsx +++ b/src/features/Conversation/Plugins/Render/StandaloneType/Iframe.tsx @@ -126,7 +126,7 @@ const IFrameRender = memo(({ url, id, payload, width = 600, h // we need to know which message to trigger if (messageId !== id) return; - triggerAIMessage(id); + triggerAIMessage({ parentId: id }); }); // when plugin want to create an assistant message diff --git a/src/features/Conversation/components/ChatItem/index.tsx b/src/features/Conversation/components/ChatItem/index.tsx index 89545ee06fea..30eecbf31835 100644 --- a/src/features/Conversation/components/ChatItem/index.tsx +++ b/src/features/Conversation/components/ChatItem/index.tsx @@ -56,10 +56,6 @@ const Item = memo(({ index, id }) => { }, isEqual); const historyLength = useChatStore((s) => chatSelectors.currentChats(s).length); - const [editing, toggleMessageEditing] = useChatStore((s) => [ - chatSelectors.isMessageEditing(id)(s), - s.toggleMessageEditing, - ]); const [isMessageLoading, generating, editing, toggleMessageEditing, updateMessageContent] = useChatStore((s) => [ diff --git a/src/features/Conversation/components/SkeletonList.tsx b/src/features/Conversation/components/SkeletonList.tsx index bf6058e9a39b..d3ec5e868d28 100644 --- a/src/features/Conversation/components/SkeletonList.tsx +++ b/src/features/Conversation/components/SkeletonList.tsx @@ -34,14 +34,14 @@ const SkeletonList = memo(({ mobile }) => { diff --git a/src/features/Conversation/index.tsx b/src/features/Conversation/index.tsx index 09d2c265ede3..fad20011729f 100644 --- a/src/features/Conversation/index.tsx +++ b/src/features/Conversation/index.tsx @@ -1,9 +1,8 @@ -import { Suspense, lazy } from 'react'; +import { Suspense } from 'react'; import { Flexbox } from 'react-layout-kit'; import SkeletonList from './components/SkeletonList'; - -const ChatList = lazy(() => import('./components/VirtualizedList')); +import ChatList from './components/VirtualizedList'; interface ConversationProps { mobile?: boolean; diff --git a/src/locales/default/plugin.ts b/src/locales/default/plugin.ts index cd8a58857183..65984041157d 100644 --- a/src/locales/default/plugin.ts +++ b/src/locales/default/plugin.ts @@ -131,10 +131,6 @@ export default { plugin: '插件运行中...', }, pluginList: '插件列表', - plugins: { - loading: '工具检测中...', - unknown: '未知工具', - }, setting: '插件设置', settings: { indexUrl: { @@ -166,4 +162,5 @@ export default { }, title: '插件商店', }, + unknownPlugin: '未知插件', }; diff --git a/src/services/__tests__/chat.test.ts b/src/services/__tests__/chat.test.ts index 15d0f9a665ad..b9f403533e2b 100644 --- a/src/services/__tests__/chat.test.ts +++ b/src/services/__tests__/chat.test.ts @@ -580,7 +580,7 @@ Get data from users`, body: JSON.stringify(expectedPayload), headers: expect.any(Object), method: 'POST', - signal: undefined, + signal: expect.any(AbortSignal), }); }); diff --git a/src/services/chat.ts b/src/services/chat.ts index 9af430a7f940..9db2af302bdf 100644 --- a/src/services/chat.ts +++ b/src/services/chat.ts @@ -21,10 +21,11 @@ import { userProfileSelectors, } from '@/store/user/selectors'; import { ChatErrorType } from '@/types/fetch'; -import { ChatMessage } from '@/types/message'; +import { ChatMessage, MessageToolCall } from '@/types/message'; import type { ChatStreamPayload, OpenAIChatMessage } from '@/types/openai/chat'; import { UserMessageContentPart } from '@/types/openai/chat'; import { FetchSSEOptions, fetchSSE, getMessageError } from '@/utils/fetch'; +import { genToolCallingName } from '@/utils/toolCall'; import { createTraceHeader, getTraceId } from '@/utils/trace'; import { createHeaderWithAuth, getProviderAuthPayload } from './_auth'; @@ -254,28 +255,33 @@ class ChatService { const enableFetchOnClient = modelConfigSelectors.isProviderFetchOnClient(provider)( useUserStore.getState(), ); - /** - * Notes: - * 1. Broswer agent runtime will skip auth check if a key and endpoint provided by - * user which will cause abuse of plugins services - * 2. This feature will disabled by default - */ + + let fetcher: typeof fetch | undefined = undefined; + if (enableFetchOnClient) { - try { - return await this.fetchOnClient({ payload, provider, signal }); - } catch (e) { - const { - errorType = ChatErrorType.BadRequest, - error: errorContent, - ...res - } = e as ChatCompletionErrorPayload; - - const error = errorContent || e; - // track the error at server side - console.error(`Route: [${provider}] ${errorType}:`, error); - - return createErrorResponse(errorType, { error, ...res, provider }); - } + /** + * Notes: + * 1. Browser agent runtime will skip auth check if a key and endpoint provided by + * user which will cause abuse of plugins services + * 2. This feature will be disabled by default + */ + fetcher = async () => { + try { + return await this.fetchOnClient({ payload, provider, signal }); + } catch (e) { + const { + errorType = ChatErrorType.BadRequest, + error: errorContent, + ...res + } = e as ChatCompletionErrorPayload; + + const error = errorContent || e; + // track the error at server side + console.error(`Route: [${provider}] ${errorType}:`, error); + + return createErrorResponse(errorType, { error, ...res, provider }); + } + }; } const traceHeader = createTraceHeader({ ...options?.trace }); @@ -287,6 +293,7 @@ class ChatService { return fetchSSE(API_ENDPOINTS.chat(provider), { body: JSON.stringify(payload), + fetcher: fetcher, headers, method: 'POST', onAbort: options?.onAbort, @@ -410,7 +417,20 @@ class ChatService { } case 'assistant': { - return { content: m.content, role: m.role, tool_calls: m.tool_calls }; + return { + content: m.content, + role: m.role, + tool_calls: m.tools?.map( + (tool): MessageToolCall => ({ + function: { + arguments: tool.arguments, + name: genToolCallingName(tool.identifier, tool.apiName, tool.type), + }, + id: tool.id, + type: tool.type, + }), + ), + }; } // TODO: need to be removed after upgrade @@ -422,9 +442,9 @@ class ChatService { case 'tool': { return { content: m.content, - name: m.tool_calls?.find((tool) => tool.id === m.tool?.id)?.function.name, + name: genToolCallingName(m.plugin!.identifier, m.plugin!.apiName, m.plugin?.type), role: m.role, - tool_call_id: m.tool?.id, + tool_call_id: m.tool_call_id, }; } diff --git a/src/store/chat/slices/message/action.test.ts b/src/store/chat/slices/message/action.test.ts index d6d6f634ac3d..3c76c23fbce3 100644 --- a/src/store/chat/slices/message/action.test.ts +++ b/src/store/chat/slices/message/action.test.ts @@ -19,6 +19,7 @@ vi.stubGlobal( vi.fn(() => Promise.resolve(new Response('mock'))), ); +vi.mock('zustand/traditional'); // Mock service vi.mock('@/services/message', () => ({ messageService: { @@ -47,12 +48,6 @@ vi.mock('@/services/chat', async (importOriginal) => { }; }); -vi.mock('@/store/chat/selectors', () => ({ - chatSelectors: { - currentChats: vi.fn(), - }, -})); - const realCoreProcessMessage = useChatStore.getState().internal_coreProcessMessage; const realRefreshMessages = useChatStore.getState().refreshMessages; // Mock state @@ -86,6 +81,9 @@ describe('chatMessage actions', () => { const messageId = 'message-id'; const deleteSpy = vi.spyOn(result.current, 'deleteMessage'); + act(() => { + useChatStore.setState({ messages: [{ id: messageId } as ChatMessage] }); + }); await act(async () => { await result.current.deleteMessage(messageId); }); @@ -259,11 +257,6 @@ describe('chatMessage actions', () => { enableAutoCreateTopic, })); - // Mock the currentChats selector to return a list that does not reach the threshold - (chatSelectors.currentChats as Mock).mockReturnValue( - Array.from({ length: autoCreateTopicThreshold + 1 }, (_, i) => ({ id: `msg-${i}` })), - ); - // Mock saveToTopic and switchTopic to simulate not being called const saveToTopicMock = vi.fn(); const switchTopicMock = vi.fn(); @@ -271,6 +264,10 @@ describe('chatMessage actions', () => { await act(async () => { useChatStore.setState({ ...mockState, + // Mock the currentChats selector to return a list that does not reach the threshold + messages: Array.from({ length: autoCreateTopicThreshold + 1 }, (_, i) => ({ + id: `msg-${i}`, + })) as any, activeTopicId: undefined, saveToTopic: saveToTopicMock, switchTopic: switchTopicMock, @@ -298,11 +295,6 @@ describe('chatMessage actions', () => { // Mock messageService.create to resolve with a message id (messageService.createMessage as Mock).mockResolvedValue('new-message-id'); - // Mock the currentChats selector to return a list that reaches the threshold - (chatSelectors.currentChats as Mock).mockReturnValue( - Array.from({ length: autoCreateTopicThreshold }, (_, i) => ({ id: `msg-${i}` })), - ); - // Mock saveToTopic to resolve with a topic id and switchTopic to switch to the new topic const saveToTopicMock = vi.fn(() => Promise.resolve('new-topic-id')); const switchTopicMock = vi.fn(); @@ -310,6 +302,9 @@ describe('chatMessage actions', () => { act(() => { useChatStore.setState({ ...mockState, + messages: Array.from({ length: autoCreateTopicThreshold }, (_, i) => ({ + id: `msg-${i}`, + })) as any, activeTopicId: undefined, saveToTopic: saveToTopicMock, switchTopic: switchTopicMock, @@ -339,11 +334,6 @@ describe('chatMessage actions', () => { enableAutoCreateTopic, })); - // Mock the currentChats selector to return a list that does not reach the threshold - (chatSelectors.currentChats as Mock).mockReturnValue( - Array.from({ length: autoCreateTopicThreshold - 1 }, (_, i) => ({ id: `msg-${i}` })), - ); - // Mock saveToTopic and switchTopic to simulate not being called const saveToTopicMock = vi.fn(); const switchTopicMock = vi.fn(); @@ -351,6 +341,10 @@ describe('chatMessage actions', () => { await act(async () => { useChatStore.setState({ ...mockState, + // Mock the currentChats selector to return a list that does not reach the threshold + messages: Array.from({ length: autoCreateTopicThreshold - 2 }, (_, i) => ({ + id: `msg-${i}`, + })) as any, activeTopicId: undefined, saveToTopic: saveToTopicMock, switchTopic: switchTopicMock, @@ -395,12 +389,14 @@ describe('chatMessage actions', () => { const { result } = renderHook(() => useChatStore()); const messageId = 'message-id'; - // Mock the currentChats selector to return a list that includes the message to be resent - (chatSelectors.currentChats as Mock).mockReturnValue([ - // ... other messages - { id: messageId, role: 'user', content: 'Resend this message' }, - // ... other messages - ]); + act(() => { + useChatStore.setState({ + // Mock the currentChats selector to return a list that includes the message to be resent + messages: [ + { id: messageId, role: 'user', content: 'Resend this message' } as ChatMessage, + ], + }); + }); // Mock the internal_coreProcessMessage function to resolve immediately mockState.internal_coreProcessMessage.mockResolvedValue(undefined); @@ -421,10 +417,12 @@ describe('chatMessage actions', () => { const { result } = renderHook(() => useChatStore()); const messageId = 'non-existing-message-id'; - // Mock the currentChats selector to return a list that does not include the message to be resent - (chatSelectors.currentChats as Mock).mockReturnValue([ - // ... other messages - ]); + act(() => { + useChatStore.setState({ + // Mock the currentChats selector to return a list that does not include the message to be resent + messages: [], + }); + }); await act(async () => { await result.current.internal_resendMessage(messageId); @@ -461,9 +459,8 @@ describe('chatMessage actions', () => { expect(internal_dispatchMessageSpy).toHaveBeenCalledWith({ id: messageId, - key: 'content', - type: 'updateMessage', - value: newContent, + type: 'updateMessages', + value: { content: newContent }, }); }); @@ -649,44 +646,6 @@ describe('chatMessage actions', () => { }); }); - it('should handle function call message at start of AI response', async () => { - const { result } = renderHook(() => useChatStore()); - const messages = [{ id: 'message-id', content: 'Hello', role: 'user' }] as ChatMessage[]; - const assistantMessageId = 'assistant-message-id'; - const aiResponse = - '{"tool_calls":[{"id":"call_sbca","type":"function","function":{"name":"pluginName____apiName","arguments":{"key":"value"}}}]}'; - - // Mock fetch to resolve with AI response containing function call - vi.mocked(fetch).mockResolvedValueOnce(new Response(aiResponse)); - - await act(async () => { - const response = await result.current.internal_fetchAIChatMessage( - messages, - assistantMessageId, - ); - expect(response.isFunctionCall).toEqual(true); - }); - }); - - it('should handle function message at end of AI response', async () => { - const { result } = renderHook(() => useChatStore()); - const messages = [{ id: 'message-id', content: 'Hello', role: 'user' }] as ChatMessage[]; - const assistantMessageId = 'assistant-message-id'; - const aiResponse = - 'Hello, human! {"tool_calls":[{"id":"call_sbca","type":"function","function":{"name":"pluginName____apiName","arguments":{"key":"value"}}}]}'; - - // Mock fetch to resolve with AI response containing function call at end - vi.mocked(fetch).mockResolvedValue(new Response(aiResponse)); - - await act(async () => { - const response = await result.current.internal_fetchAIChatMessage( - messages, - assistantMessageId, - ); - expect(response.isFunctionCall).toEqual(true); - }); - }); - it('should handle errors during AI response fetching', async () => { const { result } = renderHook(() => useChatStore()); const messages = [{ id: 'message-id', content: 'Hello', role: 'user' }] as ChatMessage[]; @@ -697,9 +656,11 @@ describe('chatMessage actions', () => { vi.mocked(fetch).mockRejectedValue(new Error(errorMessage)); await act(async () => { - await expect( - result.current.internal_fetchAIChatMessage(messages, assistantMessageId), - ).rejects.toThrow(errorMessage); + expect( + await result.current.internal_fetchAIChatMessage(messages, assistantMessageId), + ).toEqual({ + isFunctionCall: false, + }); }); }); }); diff --git a/src/store/chat/slices/message/action.ts b/src/store/chat/slices/message/action.ts index 1409db34a38d..1bae1ab27fef 100644 --- a/src/store/chat/slices/message/action.ts +++ b/src/store/chat/slices/message/action.ts @@ -157,8 +157,29 @@ export const chatMessage: StateCreator< ChatMessageAction > = (set, get) => ({ deleteMessage: async (id) => { - get().internal_dispatchMessage({ type: 'deleteMessage', id }); - await messageService.removeMessage(id); + const message = chatSelectors.getMessageById(id)(get()); + if (!message) return; + + const deleteFn = async (id: string) => { + get().internal_dispatchMessage({ type: 'deleteMessage', id }); + await messageService.removeMessage(id); + }; + + // if the message is a tool calls, then delete all the related messages + // TODO: maybe we need to delete it in the DB? + if (message.tools) { + const pools = message.tools + .flatMap((tool) => { + const messages = get().messages.filter((m) => m.tool_call_id === tool.id); + + return messages.map((m) => m.id); + }) + .map((i) => deleteFn(i)); + + await Promise.all(pools); + } + + await deleteFn(id); await get().refreshMessages(); }, delAndRegenerateMessage: async (id) => { @@ -470,10 +491,7 @@ export const chatMessage: StateCreator< internal_dispatchMessage({ id: assistantId, type: 'updateMessages', - value: { - tool_calls: chunk.tool_calls, - tools: get().internal_transformToolCalls(chunk.tool_calls), - }, + value: { tools: get().internal_transformToolCalls(chunk.tool_calls) }, }); isFunctionCall = true; } @@ -582,7 +600,7 @@ export const chatMessage: StateCreator< internal_dispatchMessage({ id, type: 'updateMessages', - value: { tool_calls: toolCalls, tools: internal_transformToolCalls(toolCalls) }, + value: { tools: internal_transformToolCalls(toolCalls) }, }); } else { internal_dispatchMessage({ id, type: 'updateMessages', value: { content } }); @@ -590,7 +608,6 @@ export const chatMessage: StateCreator< await messageService.updateMessage(id, { content, - tool_calls: toolCalls, tools: toolCalls ? internal_transformToolCalls(toolCalls) : undefined, }); await refreshMessages(); diff --git a/src/store/chat/slices/message/selectors.test.ts b/src/store/chat/slices/message/selectors.test.ts index 989f6a80afc2..a3019500d826 100644 --- a/src/store/chat/slices/message/selectors.test.ts +++ b/src/store/chat/slices/message/selectors.test.ts @@ -35,12 +35,16 @@ const mockMessages = [ { id: 'msg3', content: 'Function Message', - role: 'function', - plugin: { - arguments: ['arg1', 'arg2'], - identifier: 'func1', - type: 'pluginType', - }, + role: 'tool', + tools: [ + { + arguments: ['arg1', 'arg2'], + identifier: 'func1', + apiName: 'ttt', + type: 'pluginType', + id: 'abc', + }, + ], }, ] as ChatMessage[]; @@ -64,16 +68,22 @@ const mockedChats = [ { id: 'msg3', content: 'Function Message', - role: 'function', + role: 'tool', meta: { - avatar: '🧩', - title: 'plugin-unknown', - }, - plugin: { - arguments: ['arg1', 'arg2'], - identifier: 'func1', - type: 'pluginType', + avatar: '🤯', + backgroundColor: 'rgba(0,0,0,0)', + description: 'inbox.desc', + title: 'inbox.title', }, + tools: [ + { + arguments: ['arg1', 'arg2'], + identifier: 'func1', + apiName: 'ttt', + type: 'pluginType', + id: 'abc', + }, + ], }, ] as ChatMessage[]; @@ -103,52 +113,6 @@ describe('chatSelectors', () => { }); }); - describe('getFunctionMessageProps', () => { - it('should return the properties of a function message', () => { - const state = merge(initialStore, { - messages: mockMessages, - chatLoadingIds: ['msg3'], // Assuming this id represents a loading state - }); - const props = chatSelectors.getFunctionMessageProps(mockMessages[2])(state); - expect(props).toEqual({ - arguments: ['arg1', 'arg2'], - command: mockMessages[2].plugin, - content: 'Function Message', - id: 'func1', - loading: true, - type: 'pluginType', - }); - }); - - it('should return loading as false if the message id is not the current loading id', () => { - const state = merge(initialStore, { messages: mockMessages, chatLoadingId: 'msg1' }); - const props = chatSelectors.getFunctionMessageProps(mockMessages[2])(state); - expect(props.loading).toBe(false); - }); - - it('should return correct properties when no plugin is present', () => { - const messageWithoutPlugin = { - id: 'msg4', - content: 'No Plugin Message', - role: 'function', - // No plugin property - }; - const state = merge(initialStore, { - messages: [...mockMessages, messageWithoutPlugin], - chatLoadingId: 'msg1', - }); - const props = chatSelectors.getFunctionMessageProps(messageWithoutPlugin)(state); - expect(props).toEqual({ - arguments: undefined, - command: undefined, - content: 'No Plugin Message', - id: undefined, - loading: false, - type: undefined, - }); - }); - }); - describe('currentChatsWithHistoryConfig', () => { it('should slice the messages according to the current agent config', () => { const state = merge(initialStore, { messages: mockMessages }); @@ -185,16 +149,22 @@ describe('chatSelectors', () => { { id: 'msg3', content: 'Function Message', - role: 'function', + role: 'tool', meta: { - avatar: '🧩', - title: 'plugin-unknown', - }, - plugin: { - arguments: ['arg1', 'arg2'], - identifier: 'func1', - type: 'pluginType', + avatar: '🤯', + backgroundColor: 'rgba(0,0,0,0)', + description: 'inbox.desc', + title: 'inbox.title', }, + tools: [ + { + apiName: 'ttt', + arguments: ['arg1', 'arg2'], + identifier: 'func1', + id: 'abc', + type: 'pluginType', + }, + ], }, ]); }); diff --git a/src/store/chat/slices/message/selectors.ts b/src/store/chat/slices/message/selectors.ts index 20bb131b8c13..44b829efeddd 100644 --- a/src/store/chat/slices/message/selectors.ts +++ b/src/store/chat/slices/message/selectors.ts @@ -1,4 +1,3 @@ -import { LobePluginType } from '@lobehub/chat-plugin-sdk'; import { t } from 'i18next'; import { DEFAULT_INBOX_AVATAR, DEFAULT_USER_AVATAR } from '@/const/meta'; @@ -108,17 +107,6 @@ const chatsMessageString = (s: ChatStore): string => { return chats.map((m) => m.content).join(''); }; -const getFunctionMessageProps = - ({ plugin, content, id }: Pick) => - (s: ChatStore) => ({ - arguments: plugin?.arguments, - command: plugin, - content, - id: plugin?.identifier, - loading: s.chatLoadingIds.includes(id), - type: plugin?.type as LobePluginType, - }); - const getMessageById = (id: string) => (s: ChatStore) => chatHelpers.getMessageById(s.messages, id); const getTraceIdByMessageId = (id: string) => (s: ChatStore) => getMessageById(id)(s)?.traceId; @@ -139,7 +127,6 @@ export const chatSelectors = { currentChats, currentChatsWithGuideMessage, currentChatsWithHistoryConfig, - getFunctionMessageProps, getMessageById, getTraceIdByMessageId, isAIGenerating, diff --git a/src/store/chat/slices/plugin/action.test.ts b/src/store/chat/slices/plugin/action.test.ts index 35c6d3cbc26d..627bf70286bb 100644 --- a/src/store/chat/slices/plugin/action.test.ts +++ b/src/store/chat/slices/plugin/action.test.ts @@ -8,11 +8,13 @@ import { messageService } from '@/services/message'; import { chatSelectors } from '@/store/chat/selectors'; import { useChatStore } from '@/store/chat/store'; import { useToolStore } from '@/store/tool'; -import { ChatToolPayload } from '@/types/message'; +import { ChatMessage, ChatToolPayload } from '@/types/message'; import { LobeTool } from '@/types/tool'; const invokeStandaloneTypePlugin = useChatStore.getState().invokeStandaloneTypePlugin; +vi.mock('zustand/traditional'); + // Mock messageService vi.mock('@/services/message', () => ({ messageService: { @@ -73,6 +75,7 @@ describe('ChatPluginAction', () => { const initialState = { messages: [], coreProcessMessage: vi.fn(), + internal_coreProcessMessage: vi.fn(), refreshMessages: vi.fn(), }; useChatStore.setState(initialState); @@ -130,7 +133,6 @@ describe('ChatPluginAction', () => { content: pluginApiResponse, }); expect(storeState.refreshMessages).toHaveBeenCalled(); - expect(storeState.triggerAIMessage).toHaveBeenCalled(); expect(storeState.internal_toggleChatLoading).toHaveBeenCalledWith( false, 'message-id', @@ -172,7 +174,7 @@ describe('ChatPluginAction', () => { }); }); - describe('triggerToolCalls', () => { + describe.skip('triggerToolCalls', () => { it('should trigger a function call and update the plugin message accordingly', async () => { const messageId = 'message-id'; const messageContent = JSON.stringify({ @@ -196,21 +198,32 @@ describe('ChatPluginAction', () => { const refreshSpy = vi.spyOn(useChatStore.getState(), 'refreshMessages'); const invokeSpy = vi.spyOn(useChatStore.getState(), 'invokeDefaultTypePlugin'); - vi.spyOn(chatSelectors, 'getMessageById').mockImplementationOnce( - () => () => - ({ - id: messageId, - content: messageContent, - }) as any, - ); const { result } = renderHook(() => useChatStore()); await act(async () => { + useChatStore.setState({ + runPluginApi: vi.fn(), + messages: [ + { + id: messageId, + content: messageContent, + tools: [ + { + id: 'call_sbca', + type: 'default', + identifier: 'pluginName', + apiName: 'apiName', + arguments: "{ key: 'value' }", + }, + ], + } as ChatMessage, + ], + }); + await result.current.triggerToolCalls(messageId); }); - expect(chatSelectors.getMessageById).toHaveBeenCalledWith(messageId); expect(messageService.updateMessage).toHaveBeenCalledWith(messageId, { content: '', plugin: messagePluginPayload, diff --git a/src/store/chat/slices/plugin/action.ts b/src/store/chat/slices/plugin/action.ts index f928d5b6a2f4..6b5a57a879f2 100644 --- a/src/store/chat/slices/plugin/action.ts +++ b/src/store/chat/slices/plugin/action.ts @@ -30,7 +30,7 @@ export interface ChatPluginAction { invokeMarkdownTypePlugin: (id: string, payload: ChatToolPayload) => Promise; invokeStandaloneTypePlugin: (id: string, payload: ChatToolPayload) => Promise; runPluginApi: (id: string, payload: ChatToolPayload) => Promise; - triggerAIMessage: (id: string, traceId?: string) => Promise; + triggerAIMessage: (params: { parentId?: string; traceId?: string }) => Promise; triggerToolCalls: (id: string) => Promise; updatePluginState: (id: string, key: string, value: any) => Promise; @@ -60,36 +60,42 @@ export const chatPlugin: StateCreator< await internal_updateMessageContent(id, content); - if (triggerAiMessage) await triggerAIMessage(id); + if (triggerAiMessage) await triggerAIMessage({ parentId: id }); }, internal_transformToolCalls: (toolCalls) => { - return toolCalls.map((toolCall) => { - let payload: ChatToolPayload; - - const [identifier, apiName, type] = toolCall.function.name.split(PLUGIN_SCHEMA_SEPARATOR); - - payload = { - apiName, - arguments: toolCall.function.arguments, - id: toolCall.id, - identifier, - type: (type ?? 'default') as any, - }; - - // if the apiName is md5, try to find the correct apiName in the plugins - if (apiName.startsWith(PLUGIN_SCHEMA_API_MD5_PREFIX)) { - const md5 = apiName.replace(PLUGIN_SCHEMA_API_MD5_PREFIX, ''); - const manifest = pluginSelectors.getPluginManifestById(identifier)(useToolStore.getState()); - - const api = manifest?.api.find((api) => Md5.hashStr(api.name).toString() === md5); - if (api) { - payload.apiName = api.name; + return toolCalls + .map((toolCall): ChatToolPayload | null => { + let payload: ChatToolPayload; + + const [identifier, apiName, type] = toolCall.function.name.split(PLUGIN_SCHEMA_SEPARATOR); + + if (!apiName) return null; + + payload = { + apiName, + arguments: toolCall.function.arguments, + id: toolCall.id, + identifier, + type: (type ?? 'default') as any, + }; + + // if the apiName is md5, try to find the correct apiName in the plugins + if (apiName.startsWith(PLUGIN_SCHEMA_API_MD5_PREFIX)) { + const md5 = apiName.replace(PLUGIN_SCHEMA_API_MD5_PREFIX, ''); + const manifest = pluginSelectors.getPluginManifestById(identifier)( + useToolStore.getState(), + ); + + const api = manifest?.api.find((api) => Md5.hashStr(api.name).toString() === md5); + if (api) { + payload.apiName = api.name; + } } - } - return payload; - }); + return payload; + }) + .filter(Boolean) as ChatToolPayload[]; }, invokeBuiltinTool: async (id, payload) => { @@ -205,10 +211,10 @@ export const chatPlugin: StateCreator< return data; }, - triggerAIMessage: async (id, traceId) => { + triggerAIMessage: async ({ parentId, traceId }) => { const { internal_coreProcessMessage } = get(); const chats = chatSelectors.currentChats(get()); - await internal_coreProcessMessage(chats, id, { traceId }); + await internal_coreProcessMessage(chats, parentId ?? chats.at(-1)!.id, { traceId }); }, triggerToolCalls: async (assistantId) => { @@ -229,9 +235,10 @@ export const chatPlugin: StateCreator< const toolMessage: CreateMessageParams = { content: LOADING_FLAT, parentId: assistantId, + plugin: payload, role: 'tool', sessionId: get().activeId, - tool: payload, + tool_call_id: payload.id, topicId: get().activeTopicId, // if there is activeTopicId,then add it to topicId }; @@ -270,7 +277,7 @@ export const chatPlugin: StateCreator< const traceId = chatSelectors.getTraceIdByMessageId(latestToolId)(get()); - await triggerAIMessage(latestToolId, traceId); + await triggerAIMessage({ traceId }); }, updatePluginState: async (id, key, value) => { diff --git a/src/store/chat/slices/topic/action.ts b/src/store/chat/slices/topic/action.ts index 3c907705a1c5..9ddf1dce9660 100644 --- a/src/store/chat/slices/topic/action.ts +++ b/src/store/chat/slices/topic/action.ts @@ -134,8 +134,13 @@ export const chatTopic: StateCreator< onLoadingChange: (loading) => { internal_updateTopicLoading(topicId, loading); }, - onMessageHandle: (x) => { - output += x; + onMessageHandle: (chunk) => { + switch (chunk.type) { + case 'text': { + output += chunk.text; + } + } + updateTopicTitleInSummary(topicId, output); }, params: await chainSummaryTitle(messages), diff --git a/src/store/tool/selectors/tool.ts b/src/store/tool/selectors/tool.ts index b300547e8a70..bd4ae0f6e4a3 100644 --- a/src/store/tool/selectors/tool.ts +++ b/src/store/tool/selectors/tool.ts @@ -1,35 +1,16 @@ import { LobeChatPluginManifest } from '@lobehub/chat-plugin-sdk'; import { uniqBy } from 'lodash-es'; -import { Md5 } from 'ts-md5'; -import { PLUGIN_SCHEMA_API_MD5_PREFIX, PLUGIN_SCHEMA_SEPARATOR } from '@/const/plugin'; import { MetaData } from '@/types/meta'; import { ChatCompletionTool } from '@/types/openai/chat'; import { LobeToolMeta } from '@/types/tool/tool'; +import { genToolCallingName } from '@/utils/toolCall'; import { pluginHelpers } from '../helpers'; import { ToolStoreState } from '../initialState'; import { builtinToolSelectors } from '../slices/builtin/selectors'; import { pluginSelectors } from '../slices/plugin/selectors'; -const getAPIName = (identifier: string, name: string, type?: string) => { - const pluginType = type && type !== 'default' ? `${PLUGIN_SCHEMA_SEPARATOR + type}` : ''; - - // 将插件的 identifier 作为前缀,避免重复 - let apiName = identifier + PLUGIN_SCHEMA_SEPARATOR + name + pluginType; - - // OpenAI GPT function_call name can't be longer than 64 characters - // So we need to use md5 to shorten the name - // and then find the correct apiName in response by md5 - if (apiName.length >= 64) { - const md5Content = PLUGIN_SCHEMA_API_MD5_PREFIX + Md5.hashStr(name).toString(); - - apiName = identifier + PLUGIN_SCHEMA_SEPARATOR + md5Content + pluginType; - } - - return apiName; -}; - const enabledSchema = (tools: string[] = []) => (s: ToolStoreState): ChatCompletionTool[] => { @@ -41,7 +22,7 @@ const enabledSchema = .flatMap((manifest) => manifest.api.map((m) => ({ description: m.description, - name: getAPIName(manifest.identifier, m.name, manifest.type), + name: genToolCallingName(manifest.identifier, m.name, manifest.type), parameters: m.parameters, })), ); @@ -67,9 +48,10 @@ const enabledSystemRoles = const methods = manifest.api .map((m) => - [`#### ${getAPIName(manifest.identifier, m.name, manifest.type)}`, m.description].join( - '\n\n', - ), + [ + `#### ${genToolCallingName(manifest.identifier, m.name, manifest.type)}`, + m.description, + ].join('\n\n'), ) .join('\n\n'); diff --git a/src/types/message/index.ts b/src/types/message/index.ts index c0bed2b61f99..5f18ed5b2af0 100644 --- a/src/types/message/index.ts +++ b/src/types/message/index.ts @@ -5,7 +5,7 @@ import { ErrorType } from '@/types/fetch'; import { LLMRoleType } from '../llm'; import { BaseDataModel } from '../meta'; -import { ChatPluginPayload, ChatToolPayload, MessageToolCall } from './tools'; +import { ChatPluginPayload, ChatToolPayload } from './tools'; import { Translate } from './translate'; /** @@ -55,9 +55,7 @@ export interface ChatMessage extends BaseDataModel { * parent message id */ parentId?: string; - /** - * @deprecated - */ + plugin?: ChatPluginPayload; pluginState?: any; @@ -71,8 +69,7 @@ export interface ChatMessage extends BaseDataModel { role: LLMRoleType; sessionId?: string; - tool?: ChatToolPayload; - tool_calls?: MessageToolCall[]; + tool_call_id?: string; tools?: ChatToolPayload[]; /** diff --git a/src/utils/fetch.ts b/src/utils/fetch.ts index 5ffc062cc990..d89cae89a9c9 100644 --- a/src/utils/fetch.ts +++ b/src/utils/fetch.ts @@ -56,6 +56,7 @@ interface MessageToolCallsChunk { } export interface FetchSSEOptions { + fetcher?: typeof fetch; onAbort?: (text: string) => Promise; onErrorHandle?: (error: ChatMessageError) => void; onFinish?: OnFinishHandler; @@ -86,6 +87,7 @@ export const parseToolCalls = (origin: MessageToolCall[], value: MessageToolCall export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptions = {}) => { let output = ''; let toolCalls: undefined | MessageToolCall[]; + let triggerOnMessageHandler = false; let finishedType: SSEFinishType = 'done'; let response!: Response; @@ -93,6 +95,7 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio try { await fetchEventSource(url, { body: options.body, + fetch: options?.fetcher, headers: options.headers as Record, method: options.method, onerror: (error) => { @@ -107,7 +110,16 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio // options.onErrorHandle() }, onmessage: (ev) => { - const data = JSON.parse(ev.data); + triggerOnMessageHandler = true; + let data; + try { + data = JSON.parse(ev.data); + } catch (e) { + console.warn('parse error, fallback to stream', e); + options.onMessageHandle?.({ text: data, type: 'text' }); + return; + } + switch (ev.event) { case 'text': { output += data; @@ -145,9 +157,18 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio }); } catch {} - const traceId = response.headers.get(LOBE_CHAT_TRACE_ID); - const observationId = response.headers.get(LOBE_CHAT_OBSERVATION_ID); - await options?.onFinish?.(output, { observationId, toolCalls, traceId, type: finishedType }); + // only call onFinish when response is available + // so like abort, we don't need to call onFinish + if (response) { + // if there is no onMessageHandler, we should call onHandleMessage first + if (!triggerOnMessageHandler) { + options.onMessageHandle?.({ text: await response.clone().text(), type: 'text' }); + } + + const traceId = response.headers.get(LOBE_CHAT_TRACE_ID); + const observationId = response.headers.get(LOBE_CHAT_OBSERVATION_ID); + await options?.onFinish?.(output, { observationId, toolCalls, traceId, type: finishedType }); + } return response; }; diff --git a/src/utils/toolCall.ts b/src/utils/toolCall.ts new file mode 100644 index 000000000000..59046f697992 --- /dev/null +++ b/src/utils/toolCall.ts @@ -0,0 +1,21 @@ +import { Md5 } from 'ts-md5'; + +import { PLUGIN_SCHEMA_API_MD5_PREFIX, PLUGIN_SCHEMA_SEPARATOR } from '@/const/plugin'; + +export const genToolCallingName = (identifier: string, name: string, type?: string) => { + const pluginType = type && type !== 'default' ? `${PLUGIN_SCHEMA_SEPARATOR + type}` : ''; + + // 将插件的 identifier 作为前缀,避免重复 + let apiName = identifier + PLUGIN_SCHEMA_SEPARATOR + name + pluginType; + + // OpenAI GPT function_call name can't be longer than 64 characters + // So we need to use md5 to shorten the name + // and then find the correct apiName in response by md5 + if (apiName.length >= 64) { + const md5Content = PLUGIN_SCHEMA_API_MD5_PREFIX + Md5.hashStr(name).toString(); + + apiName = identifier + PLUGIN_SCHEMA_SEPARATOR + md5Content + pluginType; + } + + return apiName; +}; From 5d1ed62758d15a92d1b036a617a14d9da66b8fd6 Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Wed, 8 May 2024 03:18:52 +0000 Subject: [PATCH 03/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20zhipu=20GLM?= =?UTF-8?q?=20tool=20calling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/modelProviders/zhipu.ts | 8 ++------ src/libs/agent-runtime/zhipu/index.ts | 5 +++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/config/modelProviders/zhipu.ts b/src/config/modelProviders/zhipu.ts index 55a51950fbfe..19d400454b2f 100644 --- a/src/config/modelProviders/zhipu.ts +++ b/src/config/modelProviders/zhipu.ts @@ -1,9 +1,5 @@ import { ModelProviderCard } from '@/types/llm'; -// TODO: 等待 ZhiPu 修复 API 问题后开启 functionCall -// 暂时不透出 GLM 系列的 function_call 功能 -// refs https://github.com/lobehub/lobe-chat/discussions/737#discussioncomment-8315815 - // ref https://open.bigmodel.cn/dev/howuse/model const ZhiPu: ModelProviderCard = { chatModels: [ @@ -11,7 +7,7 @@ const ZhiPu: ModelProviderCard = { description: '最新的 GLM-4 、最大支持 128k 上下文、支持 Function Call 、Retreival', displayName: 'GLM-4', enabled: true, - // functionCall: true, + functionCall: true, id: 'glm-4', tokens: 128_000, }, @@ -28,7 +24,7 @@ const ZhiPu: ModelProviderCard = { description: '最新的glm-3-turbo、最大支持 128k上下文、支持Function Call、Retreival', displayName: 'GLM-3 Turbo', enabled: true, - // functionCall: true, + functionCall: true, id: 'glm-3-turbo', tokens: 128_000, }, diff --git a/src/libs/agent-runtime/zhipu/index.ts b/src/libs/agent-runtime/zhipu/index.ts index 325de0b04e0b..384a50fff358 100644 --- a/src/libs/agent-runtime/zhipu/index.ts +++ b/src/libs/agent-runtime/zhipu/index.ts @@ -1,4 +1,3 @@ -import { OpenAIStream, StreamingTextResponse } from 'ai'; import OpenAI, { ClientOptions } from 'openai'; import { LobeRuntimeAI } from '../BaseAI'; @@ -13,6 +12,8 @@ import { AgentRuntimeError } from '../utils/createError'; import { debugStream } from '../utils/debugStream'; import { desensitizeUrl } from '../utils/desensitizeUrl'; import { handleOpenAIError } from '../utils/handleOpenAIError'; +import { StreamingResponse } from '../utils/response'; +import { OpenAIStream } from '../utils/streams'; import { parseDataUri } from '../utils/uriParser'; import { generateApiToken } from './authToken'; @@ -63,7 +64,7 @@ export class LobeZhipuAI implements LobeRuntimeAI { debugStream(debug.toReadableStream()).catch(console.error); } - return new StreamingTextResponse(OpenAIStream(prod, options?.callback), { + return StreamingResponse(OpenAIStream(prod, options?.callback), { headers: options?.headers, }); } catch (error) { From 326102e66e68a14ac4cd853af1436eeeb96b7024 Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Wed, 8 May 2024 04:16:09 +0000 Subject: [PATCH 04/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20moonshot=20?= =?UTF-8?q?function=20calling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/modelProviders/moonshot.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/config/modelProviders/moonshot.ts b/src/config/modelProviders/moonshot.ts index 9c4916ab7c20..f5a63d4eefb2 100644 --- a/src/config/modelProviders/moonshot.ts +++ b/src/config/modelProviders/moonshot.ts @@ -6,18 +6,21 @@ const Moonshot: ModelProviderCard = { { displayName: 'Moonshot V1 8K', enabled: true, + functionCall: true, id: 'moonshot-v1-8k', tokens: 8192, }, { displayName: 'Moonshot V1 32K', enabled: true, + functionCall: true, id: 'moonshot-v1-32k', tokens: 32_768, }, { displayName: 'Moonshot V1 128K', enabled: true, + functionCall: true, id: 'moonshot-v1-128k', tokens: 128_000, }, From 6cf086c9a32c0e704adc78a8007e29d4a0f3fd8b Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Wed, 8 May 2024 04:31:40 +0000 Subject: [PATCH 05/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20groq=20tool?= =?UTF-8?q?=20callings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/modelProviders/groq.ts | 6 +- src/libs/agent-runtime/groq/index.ts | 7 ++ .../utils/openaiCompatibleFactory/index.ts | 73 ++++++++++++++++++- 3 files changed, 82 insertions(+), 4 deletions(-) diff --git a/src/config/modelProviders/groq.ts b/src/config/modelProviders/groq.ts index 2a3a71b5dc3a..12f112946d46 100644 --- a/src/config/modelProviders/groq.ts +++ b/src/config/modelProviders/groq.ts @@ -6,24 +6,28 @@ const Groq: ModelProviderCard = { { displayName: 'LLaMA3-3-70B', enabled: true, + functionCall: true, id: 'llama3-70b-8192', tokens: 8192, }, { - displayName: 'Mixtral-8x7b-Instruct-v0.1', + displayName: 'Mixtral-8x7b', enabled: true, + functionCall: true, id: 'mixtral-8x7b-32768', tokens: 32_768, }, { displayName: 'Gemma-7b-it', enabled: true, + functionCall: true, id: 'gemma-7b-it', tokens: 8192, }, { displayName: 'LLaMA3-3-8B', enabled: true, + functionCall: true, id: 'llama3-8b-8192', tokens: 8192, }, diff --git a/src/libs/agent-runtime/groq/index.ts b/src/libs/agent-runtime/groq/index.ts index f30c1a9c159d..0774f0f8da6e 100644 --- a/src/libs/agent-runtime/groq/index.ts +++ b/src/libs/agent-runtime/groq/index.ts @@ -10,6 +10,13 @@ export const LobeGroq = LobeOpenAICompatibleFactory({ if (error.status === 403) return { error, errorType: AgentRuntimeErrorType.LocationNotSupportError }; }, + handlePayload: (payload) => { + return { + ...payload, + // disable stream for tools due to groq dont support + stream: !payload.tools, + } as any; + }, }, debug: { chatCompletion: () => process.env.DEBUG_GROQ_CHAT_COMPLETION === '1', diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts index 1c58bf5b1723..dda87d6ffab3 100644 --- a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts +++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts @@ -81,13 +81,26 @@ export const LobeOpenAICompatibleFactory = ({ signal: options?.signal, }); - const [prod, useForDebug] = response.tee(); + if (postPayload.stream) { + const [prod, useForDebug] = response.tee(); + + if (debug?.chatCompletion?.()) { + debugStream(useForDebug.toReadableStream()).catch(console.error); + } + + return StreamingResponse(OpenAIStream(prod, options?.callback), { + headers: options?.headers, + }); + } if (debug?.chatCompletion?.()) { - debugStream(useForDebug.toReadableStream()).catch(console.error); + console.log('\n[no stream response]\n'); + console.log(JSON.stringify(response) + '\n'); } - return StreamingResponse(OpenAIStream(prod, options?.callback), { + const stream = this.transformResponseToStream(response as unknown as OpenAI.ChatCompletion); + + return StreamingResponse(OpenAIStream(stream, options?.callback), { headers: options?.headers, }); } catch (error) { @@ -161,4 +174,58 @@ export const LobeOpenAICompatibleFactory = ({ .filter(Boolean) as ChatModelCard[]; } + + /** + * make the OpenAI response data as a stream + * @private + */ + private transformResponseToStream(data: OpenAI.ChatCompletion) { + return new ReadableStream({ + start(controller) { + const chunk: OpenAI.ChatCompletionChunk = { + choices: data.choices.map((choice: OpenAI.ChatCompletion.Choice) => ({ + delta: { + content: choice.message.content, + role: choice.message.role, + tool_calls: choice.message.tool_calls?.map( + (tool, index): OpenAI.ChatCompletionChunk.Choice.Delta.ToolCall => ({ + function: tool.function, + id: tool.id, + index, + type: tool.type, + }), + ), + }, + finish_reason: null, + index: choice.index, + logprobs: choice.logprobs, + })), + created: data.created, + id: data.id, + model: data.model, + object: 'chat.completion.chunk', + }; + + controller.enqueue(chunk); + + controller.enqueue({ + choices: data.choices.map((choice: OpenAI.ChatCompletion.Choice) => ({ + delta: { + content: choice.message.content, + role: choice.message.role, + }, + finish_reason: choice.finish_reason, + index: choice.index, + logprobs: choice.logprobs, + })), + created: data.created, + id: data.id, + model: data.model, + object: 'chat.completion.chunk', + system_fingerprint: data.system_fingerprint, + } as OpenAI.ChatCompletionChunk); + controller.close(); + }, + }); + } }; From dde5582d1c247914e547c771981a365a49b78dbb Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Wed, 8 May 2024 05:57:06 +0000 Subject: [PATCH 06/24] =?UTF-8?q?=E2=9C=A8=20feat:=20azure=20openai=20stre?= =?UTF-8?q?am=20tool=20calling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/libs/agent-runtime/azureOpenai/index.ts | 11 ++++++----- src/libs/agent-runtime/utils/streams/openai.ts | 7 ++++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/libs/agent-runtime/azureOpenai/index.ts b/src/libs/agent-runtime/azureOpenai/index.ts index 479ddcd409ca..113245bcbbe3 100644 --- a/src/libs/agent-runtime/azureOpenai/index.ts +++ b/src/libs/agent-runtime/azureOpenai/index.ts @@ -4,13 +4,14 @@ import { GetChatCompletionsOptions, OpenAIClient, } from '@azure/openai'; -import { OpenAIStream, StreamingTextResponse } from 'ai'; import { LobeRuntimeAI } from '../BaseAI'; import { AgentRuntimeErrorType } from '../error'; import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../types'; import { AgentRuntimeError } from '../utils/createError'; import { debugStream } from '../utils/debugStream'; +import { StreamingResponse } from '../utils/response'; +import { OpenAIStream } from '../utils/streams'; export class LobeAzureOpenAI implements LobeRuntimeAI { client: OpenAIClient; @@ -40,15 +41,15 @@ export class LobeAzureOpenAI implements LobeRuntimeAI { { ...params, abortSignal: options?.signal, maxTokens } as GetChatCompletionsOptions, ); - const stream = OpenAIStream(response as any); - - const [debug, prod] = stream.tee(); + const [debug, prod] = response.tee(); if (process.env.DEBUG_AZURE_CHAT_COMPLETION === '1') { debugStream(debug).catch(console.error); } - return new StreamingTextResponse(prod); + return StreamingResponse(OpenAIStream(prod, options?.callback), { + headers: options?.headers, + }); } catch (e) { let error = e as { [key: string]: any; code: string; message: string }; diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts index dd8010fffe36..803e2b01578b 100644 --- a/src/libs/agent-runtime/utils/streams/openai.ts +++ b/src/libs/agent-runtime/utils/streams/openai.ts @@ -6,11 +6,12 @@ import { ChatStreamCallbacks } from '../../types'; import { transformOpenAIStream } from './protocol'; export const OpenAIStream = ( - stream: Stream, + stream: Stream | ReadableStream, callbacks?: ChatStreamCallbacks, ) => { - return stream - .toReadableStream() + const readableStream = stream instanceof ReadableStream ? stream : stream.toReadableStream(); + + return readableStream .pipeThrough( new TransformStream({ transform: (chunk, controller) => { From ad30764fd48f48b61f7f64d63d87a5a532f47990 Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Wed, 8 May 2024 09:18:25 +0000 Subject: [PATCH 07/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20minimax=20t?= =?UTF-8?q?ool=20calling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/modelProviders/minimax.ts | 17 ++++--- src/config/server/provider.ts | 2 +- src/libs/agent-runtime/minimax/index.ts | 51 +++++-------------- src/libs/agent-runtime/utils/streams/index.ts | 1 + .../agent-runtime/utils/streams/minimax.ts | 39 ++++++++++++++ .../agent-runtime/utils/streams/openai.ts | 11 +++- .../agent-runtime/utils/streams/protocol.ts | 47 ++++++++++------- 7 files changed, 103 insertions(+), 65 deletions(-) create mode 100644 src/libs/agent-runtime/utils/streams/minimax.ts diff --git a/src/config/modelProviders/minimax.ts b/src/config/modelProviders/minimax.ts index f3cd3a60b6bc..2fbfb0941d8e 100644 --- a/src/config/modelProviders/minimax.ts +++ b/src/config/modelProviders/minimax.ts @@ -3,24 +3,27 @@ import { ModelProviderCard } from '@/types/llm'; // ref https://www.minimaxi.com/document/guides/chat-model/pro/api const Minimax: ModelProviderCard = { chatModels: [ - { - description: '复杂场景,例如应用题计算、科学计算等场景', - displayName: 'abab6.5', - enabled: true, - id: 'abab6.5-chat', - tokens: 8192, - }, { description: '通用场景', displayName: 'abab6.5s', enabled: true, + functionCall: true, id: 'abab6.5s-chat', tokens: 245_760, }, + { + description: '复杂场景,例如应用题计算、科学计算等场景', + displayName: 'abab6.5', + enabled: true, + functionCall: true, + id: 'abab6.5-chat', + tokens: 8192, + }, { description: '更复杂的格式化文本生成', displayName: 'abab6', enabled: true, + functionCall: true, id: 'abab6-chat', tokens: 32_768, }, diff --git a/src/config/server/provider.ts b/src/config/server/provider.ts index d1d497dab4a4..28b43e91873d 100644 --- a/src/config/server/provider.ts +++ b/src/config/server/provider.ts @@ -211,7 +211,7 @@ export const getProviderConfig = () => { AWS_ACCESS_KEY_ID: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY: process.env.AWS_SECRET_ACCESS_KEY || '', - ENABLE_OLLAMA: process.env.ENABLE_OLLAMA as unknown as boolean, + ENABLE_OLLAMA: Boolean(process.env.ENABLE_OLLAMA), OLLAMA_PROXY_URL: process.env.OLLAMA_PROXY_URL || '', OLLAMA_MODEL_LIST: process.env.OLLAMA_MODEL_LIST || process.env.OLLAMA_CUSTOM_MODELS, }; diff --git a/src/libs/agent-runtime/minimax/index.ts b/src/libs/agent-runtime/minimax/index.ts index 9c850482e490..85c372069a17 100644 --- a/src/libs/agent-runtime/minimax/index.ts +++ b/src/libs/agent-runtime/minimax/index.ts @@ -1,9 +1,6 @@ -import { StreamingTextResponse } from 'ai'; import { isEmpty } from 'lodash-es'; import OpenAI from 'openai'; -import { debugStream } from '@/libs/agent-runtime/utils/debugStream'; - import { LobeRuntimeAI } from '../BaseAI'; import { AgentRuntimeErrorType } from '../error'; import { @@ -13,6 +10,9 @@ import { ModelProvider, } from '../types'; import { AgentRuntimeError } from '../utils/createError'; +import { debugStream } from '../utils/debugStream'; +import { StreamingResponse } from '../utils/response'; +import { MinimaxStream } from '../utils/streams'; interface MinimaxBaseResponse { base_resp?: { @@ -69,18 +69,8 @@ export class LobeMinimaxAI implements LobeRuntimeAI { this.apiKey = apiKey; } - async chat( - payload: ChatStreamPayload, - options?: ChatCompetitionOptions, - ): Promise { + async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions): Promise { try { - let streamController: ReadableStreamDefaultController | undefined; - const readableStream = new ReadableStream({ - start(controller) { - streamController = controller; - }, - }); - const response = await fetch('https://api.minimax.chat/v1/text/chatcompletion_v2', { body: JSON.stringify(this.buildCompletionsParams(payload)), headers: { @@ -107,12 +97,10 @@ export class LobeMinimaxAI implements LobeRuntimeAI { debugStream(debug).catch(console.error); } - this.parseResponse(prod.getReader(), streamController); - // wait for the first response, and throw error if minix returns an error await this.parseFirstResponse(prod2.getReader()); - return new StreamingTextResponse(readableStream, { headers: options?.headers }); + return StreamingResponse(MinimaxStream(prod), { headers: options?.headers }); } catch (error) { console.log('error', error); const err = error as Error | ChatCompletionErrorPayload; @@ -154,30 +142,19 @@ export class LobeMinimaxAI implements LobeRuntimeAI { max_tokens: this.getMaxTokens(payload.model), stream: true, temperature: temperature === 0 ? undefined : temperature, + + tools: params.tools?.map((tool) => ({ + function: { + description: tool.function.description, + name: tool.function.name, + parameters: JSON.stringify(tool.function.parameters), + }, + type: 'function', + })), top_p: top_p === 0 ? undefined : top_p, }; } - private async parseResponse( - reader: ReadableStreamDefaultReader, - streamController: ReadableStreamDefaultController | undefined, - ) { - const encoder = new TextEncoder(); - const decoder = new TextDecoder(); - let done = false; - - while (!done) { - const { value, done: doneReading } = await reader.read(); - done = doneReading; - const chunkValue = decoder.decode(value, { stream: true }); - const data = parseMinimaxResponse(chunkValue); - const text = data?.choices?.at(0)?.delta?.content || undefined; - streamController?.enqueue(encoder.encode(text)); - } - - streamController?.close(); - } - private async parseFirstResponse(reader: ReadableStreamDefaultReader) { const decoder = new TextDecoder(); diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts index 0a8bed033dba..1e2d3f465d57 100644 --- a/src/libs/agent-runtime/utils/streams/index.ts +++ b/src/libs/agent-runtime/utils/streams/index.ts @@ -1,2 +1,3 @@ export * from './openai'; +export * from './minimax'; export * from './protocol'; diff --git a/src/libs/agent-runtime/utils/streams/minimax.ts b/src/libs/agent-runtime/utils/streams/minimax.ts new file mode 100644 index 000000000000..cdab8bc26f32 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/minimax.ts @@ -0,0 +1,39 @@ +import { createCallbacksTransformer } from 'ai'; +import OpenAI from 'openai'; + +import { ChatStreamCallbacks } from '../../types'; +import { transformOpenAIStream } from './protocol'; + +const unit8ArrayToJSONChunk = (unit8Array: Uint8Array): OpenAI.ChatCompletionChunk => { + const decoder = new TextDecoder(); + + let chunkValue = decoder.decode(unit8Array, { stream: true }); + + // chunkValue example: + // data: {"id":"028a65377137d57aaceeffddf48ae99f","choices":[{"finish_reason":"tool_calls","index":0,"delta":{"role":"assistant","tool_calls":[{"id":"call_function_7371372822","type":"function","function":{"name":"realtime-weather____fetchCurrentWeather","arguments":"{\"city\": [\"杭州\", \"北京\"]}"}}]}}],"created":155511,"model":"abab6.5s-chat","object":"chat.completion.chunk"} + + // so we need to remove `data:` prefix and then parse it as JSON + if (chunkValue.startsWith('data:')) { + chunkValue = chunkValue.slice(5).trim(); + } + + return JSON.parse(chunkValue); +}; + +export const MinimaxStream = (stream: ReadableStream, callbacks?: ChatStreamCallbacks) => { + return stream + .pipeThrough( + new TransformStream({ + transform: (buffer, controller) => { + const chunk = unit8ArrayToJSONChunk(buffer); + + const { type, id, data } = transformOpenAIStream(chunk); + + controller.enqueue(`id: ${id}\n`); + controller.enqueue(`event: ${type}\n`); + controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); + }, + }), + ) + .pipeThrough(createCallbacksTransformer(callbacks)); +}; diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts index 803e2b01578b..8b621d2bbae1 100644 --- a/src/libs/agent-runtime/utils/streams/openai.ts +++ b/src/libs/agent-runtime/utils/streams/openai.ts @@ -1,15 +1,22 @@ -import { createCallbacksTransformer } from 'ai'; +import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai'; import OpenAI from 'openai'; import type { Stream } from 'openai/streaming'; import { ChatStreamCallbacks } from '../../types'; import { transformOpenAIStream } from './protocol'; +const chatStreamable = async function* (stream: AsyncIterable) { + for await (const response of stream) { + yield response; + } +}; + export const OpenAIStream = ( stream: Stream | ReadableStream, callbacks?: ChatStreamCallbacks, ) => { - const readableStream = stream instanceof ReadableStream ? stream : stream.toReadableStream(); + const readableStream = + stream instanceof ReadableStream ? stream : readableFromAsyncIterable(chatStreamable(stream)); return readableStream .pipeThrough( diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts index d2a5b873147f..658c36b71e1b 100644 --- a/src/libs/agent-runtime/utils/streams/protocol.ts +++ b/src/libs/agent-runtime/utils/streams/protocol.ts @@ -1,41 +1,52 @@ import OpenAI from 'openai'; -interface StreamProtocolChunk { +export interface StreamProtocolChunk { data: any; id: string; type: 'text' | 'tool_calls' | 'data' | 'stop'; } -export const transformOpenAIStream = (chunk: Uint8Array): StreamProtocolChunk => { - const decoder = new TextDecoder(); - - const chunkValue = decoder.decode(chunk, { stream: true }); - const jsonValue: OpenAI.ChatCompletionChunk = JSON.parse(chunkValue); - +export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): StreamProtocolChunk => { // maybe need another structure to add support for multiple choices - const item = jsonValue.choices[0]; + const item = chunk.choices[0]; - if (typeof item.delta.content === 'string') { - return { data: item.delta.content, id: jsonValue.id, type: 'text' }; + if (typeof item.delta?.content === 'string') { + return { data: item.delta.content, id: chunk.id, type: 'text' }; } - if (item.delta.tool_calls) { - return { data: item.delta.tool_calls, id: jsonValue.id, type: 'tool_calls' }; - } + if (item.delta?.tool_calls) { + return { + data: item.delta.tool_calls.map((value, index) => ({ + ...value, - if (item.delta.content === null) { - return { data: item.delta, id: jsonValue.id, type: 'data' }; + // mistral's tool calling don't have index and function field, it's data like: + // [{"id":"xbhnmTtY7","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"A photo of a small, fluffy dog with a playful expression and wagging tail.\", \"A watercolor painting of a small, energetic dog with a glossy coat and bright eyes.\", \"A vector illustration of a small, adorable dog with a short snout and perky ears.\", \"A drawing of a small, scruffy dog with a mischievous grin and a wagging tail.\"], \"quality\": \"standard\", \"seeds\": [123456, 654321, 111222, 333444], \"size\": \"1024x1024\", \"style\": \"vivid\"}"}}] + + // minimax's tool calling don't have index field, it's data like: + // [{"id":"call_function_4752059746","type":"function","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"一个流浪的地球,背景是浩瀚"}}] + + // so we need to add these default values + index: typeof value.index !== 'undefined' ? value.index : index, + type: value.type || 'function', + })), + id: chunk.id, + type: 'tool_calls', + }; } // 给定结束原因 if (item.finish_reason) { - return { data: item.finish_reason, id: jsonValue.id, type: 'stop' }; + return { data: item.finish_reason, id: chunk.id, type: 'stop' }; + } + + if (item.delta.content === null) { + return { data: item.delta, id: chunk.id, type: 'data' }; } // 其余情况下,返回 delta 和 index return { - data: { delta: item.delta, id: jsonValue.id, index: item.index }, - id: jsonValue.id, + data: { delta: item.delta, id: chunk.id, index: item.index }, + id: chunk.id, type: 'data', }; }; From 1292e802b360cfdb9750910ac65899bb6013b2a6 Mon Sep 17 00:00:00 2001 From: arvinxx Date: Thu, 9 May 2024 02:12:39 +0800 Subject: [PATCH 08/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20mistral=20t?= =?UTF-8?q?ool=20calling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/modelProviders/mistral.ts | 1 + src/libs/agent-runtime/mistral/index.ts | 1 + 2 files changed, 2 insertions(+) diff --git a/src/config/modelProviders/mistral.ts b/src/config/modelProviders/mistral.ts index e2f870b8af8e..959362fd0686 100644 --- a/src/config/modelProviders/mistral.ts +++ b/src/config/modelProviders/mistral.ts @@ -18,6 +18,7 @@ const Mistral: ModelProviderCard = { { displayName: 'Mixtral 8x22B', enabled: true, + functionCall: true, id: 'open-mixtral-8x22b', tokens: 65_536, }, diff --git a/src/libs/agent-runtime/mistral/index.ts b/src/libs/agent-runtime/mistral/index.ts index cd23f01f52f5..5bd1980a228a 100644 --- a/src/libs/agent-runtime/mistral/index.ts +++ b/src/libs/agent-runtime/mistral/index.ts @@ -11,6 +11,7 @@ export const LobeMistralAI = LobeOpenAICompatibleFactory({ model: payload.model, stream: true, temperature: payload.temperature, + tools: payload.tools, top_p: payload.top_p, }), }, From 7e687231d3803b3eed85617eeb17fc02ffc4b4bf Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Thu, 9 May 2024 05:54:31 +0000 Subject: [PATCH 09/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20google=20to?= =?UTF-8?q?ol=20calling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/modelProviders/google.ts | 3 + src/libs/agent-runtime/google/index.ts | 109 +++++++++++++++-- src/libs/agent-runtime/utils/debugStream.ts | 48 ++++++-- .../agent-runtime/utils/streams/google-ai.ts | 110 ++++++++++++++++++ src/libs/agent-runtime/utils/streams/index.ts | 1 + .../agent-runtime/utils/streams/minimax.ts | 2 +- .../agent-runtime/utils/streams/openai.ts | 50 +++++++- .../agent-runtime/utils/streams/protocol.ts | 54 +++------ 8 files changed, 316 insertions(+), 61 deletions(-) create mode 100644 src/libs/agent-runtime/utils/streams/google-ai.ts diff --git a/src/config/modelProviders/google.ts b/src/config/modelProviders/google.ts index 3f5586f326e6..27e0566cd4d2 100644 --- a/src/config/modelProviders/google.ts +++ b/src/config/modelProviders/google.ts @@ -22,6 +22,7 @@ const Google: ModelProviderCard = { description: 'The best model for scaling across a wide range of tasks', displayName: 'Gemini 1.0 Pro', enabled: true, + functionCall: true, id: 'gemini-pro', maxOutput: 2048, tokens: 30_720 + 2048, @@ -47,6 +48,7 @@ const Google: ModelProviderCard = { description: 'The best model for scaling across a wide range of tasks. This is a stable model that supports tuning.', displayName: 'Gemini 1.0 Pro 001 (Tuning)', + functionCall: true, id: 'gemini-1.0-pro-001', maxOutput: 2048, tokens: 30_720 + 2048, @@ -71,6 +73,7 @@ const Google: ModelProviderCard = { description: 'Mid-size multimodal model that supports up to 1 million tokens', displayName: 'Gemini 1.5 Pro', enabled: true, + functionCall: true, id: 'gemini-1.5-pro-latest', maxOutput: 8192, tokens: 1_048_576 + 8192, diff --git a/src/libs/agent-runtime/google/index.ts b/src/libs/agent-runtime/google/index.ts index c3f83ccc0e38..e9984be47799 100644 --- a/src/libs/agent-runtime/google/index.ts +++ b/src/libs/agent-runtime/google/index.ts @@ -1,10 +1,20 @@ -import { Content, GoogleGenerativeAI, Part } from '@google/generative-ai'; -import { GoogleGenerativeAIStream, StreamingTextResponse } from 'ai'; +import { + Content, + FunctionDeclaration, + FunctionDeclarationSchemaProperty, + FunctionDeclarationSchemaType, + Tool as GoogleFunctionCallTool, + GoogleGenerativeAI, + Part, +} from '@google/generative-ai'; +import { JSONSchema7 } from 'json-schema'; +import { transform } from 'lodash-es'; import { LobeRuntimeAI } from '../BaseAI'; import { AgentRuntimeErrorType, ILobeAgentRuntimeErrorType } from '../error'; import { ChatCompetitionOptions, + ChatCompletionTool, ChatStreamPayload, OpenAIChatMessage, UserMessageContentPart, @@ -12,6 +22,8 @@ import { import { ModelProvider } from '../types/type'; import { AgentRuntimeError } from '../utils/createError'; import { debugStream } from '../utils/debugStream'; +import { StreamingResponse } from '../utils/response'; +import { GoogleGenerativeAIStream, googleGenAIResultToStream } from '../utils/streams'; import { parseDataUri } from '../utils/uriParser'; enum HarmCategory { @@ -42,7 +54,7 @@ export class LobeGoogleAI implements LobeRuntimeAI { const contents = this.buildGoogleMessages(payload.messages, model); - const geminiStream = await this.client + const geminiStreamResult = await this.client .getGenerativeModel( { generationConfig: { @@ -74,19 +86,20 @@ export class LobeGoogleAI implements LobeRuntimeAI { }, { apiVersion: 'v1beta', baseUrl: this.baseURL }, ) - .generateContentStream({ contents }); - - // Convert the response into a friendly text-stream - const stream = GoogleGenerativeAIStream(geminiStream, options?.callback); + .generateContentStream({ contents, tools: this.buildGoogleTools(payload.tools) }); - const [debug, output] = stream.tee(); + const googleStream = googleGenAIResultToStream(geminiStreamResult); + const [prod, useForDebug] = googleStream.tee(); if (process.env.DEBUG_GOOGLE_CHAT_COMPLETION === '1') { - debugStream(debug).catch(console.error); + debugStream(useForDebug).catch(); } + // Convert the response into a friendly text-stream + const stream = GoogleGenerativeAIStream(prod, options?.callback); + // Respond with the stream - return new StreamingTextResponse(output, { headers: options?.headers }); + return StreamingResponse(stream, { headers: options?.headers }); } catch (e) { const err = e as Error; @@ -226,6 +239,82 @@ export class LobeGoogleAI implements LobeRuntimeAI { return defaultError; } } + + private buildGoogleTools( + tools: ChatCompletionTool[] | undefined, + ): GoogleFunctionCallTool[] | undefined { + if (!tools || tools.length === 0) return; + + return [ + { + functionDeclarations: tools.map((tool) => { + const t = this.convertToolToGoogleTool(tool); + console.log('output Schema', t); + return t; + }), + }, + ]; + } + + private convertToolToGoogleTool = (tool: ChatCompletionTool): FunctionDeclaration => { + const functionDeclaration = tool.function; + const parameters = functionDeclaration.parameters; + + console.log('input Schema', JSON.stringify(parameters, null, 2)); + + return { + description: functionDeclaration.description, + name: functionDeclaration.name, + parameters: { + description: parameters?.description, + properties: transform(parameters?.properties, (result, value, key: string) => { + result[key] = this.convertSchemaObject(value as JSONSchema7); + }), + required: parameters?.required, + type: FunctionDeclarationSchemaType.OBJECT, + }, + }; + }; + + private convertSchemaObject(schema: JSONSchema7): FunctionDeclarationSchemaProperty { + console.log('input:', schema); + + switch (schema.type) { + default: + case 'object': { + return { + ...schema, + properties: Object.fromEntries( + Object.entries(schema.properties || {}).map(([key, value]) => [ + key, + this.convertSchemaObject(value as JSONSchema7), + ]), + ), + type: FunctionDeclarationSchemaType.OBJECT, + } as any; + } + + case 'array': { + return { + ...schema, + items: this.convertSchemaObject(schema.items as JSONSchema7), + type: FunctionDeclarationSchemaType.ARRAY, + } as any; + } + + case 'string': { + return { ...schema, type: FunctionDeclarationSchemaType.STRING } as any; + } + + case 'number': { + return { ...schema, type: FunctionDeclarationSchemaType.NUMBER } as any; + } + + case 'boolean': { + return { ...schema, type: FunctionDeclarationSchemaType.BOOLEAN } as any; + } + } + } } export default LobeGoogleAI; diff --git a/src/libs/agent-runtime/utils/debugStream.ts b/src/libs/agent-runtime/utils/debugStream.ts index 75dffe462ff1..4fbed582fa12 100644 --- a/src/libs/agent-runtime/utils/debugStream.ts +++ b/src/libs/agent-runtime/utils/debugStream.ts @@ -1,18 +1,48 @@ +// no need to introduce a package to get the current time as this module is just a debug utility +const getTime = () => { + const date = new Date(); + return `${date.getFullYear()}-${date.getDate()}-${date.getDay()} ${date.getHours()}:${date.getMinutes()}:${date.getSeconds()}.${date.getMilliseconds()}`; +}; + export const debugStream = async (stream: ReadableStream) => { - let done = false; + let finished = false; let chunk = 0; + let chunkValue: any; const decoder = new TextDecoder(); const reader = stream.getReader(); - while (!done) { - const { value, done: _done } = await reader.read(); - const chunkValue = decoder.decode(value, { stream: true }); - if (!_done) { - console.log(`[chunk ${chunk}]`); + + console.log(`[stream start] ${getTime()}`); + + while (!finished) { + try { + const { value, done } = await reader.read(); + + if (done) { + console.log(`[stream finished] total chunks: ${chunk}\n`); + finished = true; + break; + } + + chunkValue = value; + + // if the value is ArrayBuffer, we need to decode it + if ('byteLength' in value) { + chunkValue = decoder.decode(value, { stream: true }); + } else if (typeof value !== 'string') { + chunkValue = JSON.stringify(value); + } + + console.log(`[chunk ${chunk}] ${getTime()}`); console.log(chunkValue); - } + console.log(`\n`); - done = _done; - chunk++; + finished = done; + chunk++; + } catch (e) { + finished = true; + console.error('[debugStream error]', e); + console.error('[error chunk value:]', chunkValue); + } } }; diff --git a/src/libs/agent-runtime/utils/streams/google-ai.ts b/src/libs/agent-runtime/utils/streams/google-ai.ts new file mode 100644 index 000000000000..cf41e51d1747 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/google-ai.ts @@ -0,0 +1,110 @@ +import { + EnhancedGenerateContentResponse, + GenerateContentStreamResult, +} from '@google/generative-ai'; +import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai'; + +import { nanoid } from '@/utils/uuid'; + +import { ChatStreamCallbacks } from '../../types'; +import { + StreamProtocolChunk, + StreamToolCallChunk, + chatStreamable, + generateToolCallId, +} from './protocol'; + +const transformGoogleGenerativeAIStream = ( + chunk: EnhancedGenerateContentResponse, +): StreamProtocolChunk => { + // maybe need another structure to add support for multiple choices + const functionCalls = chunk.functionCalls(); + + if (functionCalls) { + return { + data: functionCalls.map( + (value, index): StreamToolCallChunk => ({ + function: { + arguments: JSON.stringify(value.args), + name: value.name, + }, + id: generateToolCallId(index, value.name), + index: index, + type: 'function', + }), + ), + id: nanoid(), + type: 'tool_calls', + }; + } + const text = chunk.text(); + + return { + data: text, + id: nanoid(), + type: 'text', + }; + // if (typeof item.delta?.content === 'string') { + // return { data: item.delta.content, id: chunk.id, type: 'text' }; + // } + // + // if (item.delta?.tool_calls) { + // return { + // data: item.delta.tool_calls.map((value, index) => ({ + // ...value, + // + // // mistral's tool calling don't have index and function field, it's data like: + // // [{"id":"xbhnmTtY7","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"A photo of a small, fluffy dog with a playful expression and wagging tail.\", \"A watercolor painting of a small, energetic dog with a glossy coat and bright eyes.\", \"A vector illustration of a small, adorable dog with a short snout and perky ears.\", \"A drawing of a small, scruffy dog with a mischievous grin and a wagging tail.\"], \"quality\": \"standard\", \"seeds\": [123456, 654321, 111222, 333444], \"size\": \"1024x1024\", \"style\": \"vivid\"}"}}] + // + // // minimax's tool calling don't have index field, it's data like: + // // [{"id":"call_function_4752059746","type":"function","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"一个流浪的地球,背景是浩瀚"}}] + // + // // so we need to add these default values + // index: typeof value.index !== 'undefined' ? value.index : index, + // type: value.type || 'function', + // })), + // id: chunk.id, + // type: 'tool_calls', + // }; + // } + // + // // 给定结束原因 + // if (item.finish_reason) { + // return { data: item.finish_reason, id: chunk.id, type: 'stop' }; + // } + // + // if (item.delta.content === null) { + // return { data: item.delta, id: chunk.id, type: 'data' }; + // } + // + // // 其余情况下,返回 delta 和 index + // return { + // data: { delta: item.delta, id: chunk.id, index: item.index }, + // id: chunk.id, + // type: 'data', + // }; +}; + +// only use for debug +export const googleGenAIResultToStream = (stream: GenerateContentStreamResult) => { + // make the response to the streamable format + return readableFromAsyncIterable(chatStreamable(stream.stream)); +}; + +export const GoogleGenerativeAIStream = ( + rawStream: ReadableStream, + callbacks?: ChatStreamCallbacks, +) => + rawStream + .pipeThrough( + new TransformStream({ + transform: (chunk, controller) => { + const { type, id, data } = transformGoogleGenerativeAIStream(chunk); + + controller.enqueue(`id: ${id}\n`); + controller.enqueue(`event: ${type}\n`); + controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); + }, + }), + ) + .pipeThrough(createCallbacksTransformer(callbacks)); diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts index 1e2d3f465d57..47bec33c4374 100644 --- a/src/libs/agent-runtime/utils/streams/index.ts +++ b/src/libs/agent-runtime/utils/streams/index.ts @@ -1,3 +1,4 @@ +export * from './google-ai'; export * from './openai'; export * from './minimax'; export * from './protocol'; diff --git a/src/libs/agent-runtime/utils/streams/minimax.ts b/src/libs/agent-runtime/utils/streams/minimax.ts index cdab8bc26f32..cb53534d6468 100644 --- a/src/libs/agent-runtime/utils/streams/minimax.ts +++ b/src/libs/agent-runtime/utils/streams/minimax.ts @@ -2,7 +2,7 @@ import { createCallbacksTransformer } from 'ai'; import OpenAI from 'openai'; import { ChatStreamCallbacks } from '../../types'; -import { transformOpenAIStream } from './protocol'; +import { transformOpenAIStream } from './openai'; const unit8ArrayToJSONChunk = (unit8Array: Uint8Array): OpenAI.ChatCompletionChunk => { const decoder = new TextDecoder(); diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts index 8b621d2bbae1..4b6bbb401170 100644 --- a/src/libs/agent-runtime/utils/streams/openai.ts +++ b/src/libs/agent-runtime/utils/streams/openai.ts @@ -3,7 +3,55 @@ import OpenAI from 'openai'; import type { Stream } from 'openai/streaming'; import { ChatStreamCallbacks } from '../../types'; -import { transformOpenAIStream } from './protocol'; +import { StreamProtocolChunk, StreamToolCallChunk, generateToolCallId } from './protocol'; + +export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): StreamProtocolChunk => { + // maybe need another structure to add support for multiple choices + const item = chunk.choices[0]; + + if (typeof item.delta?.content === 'string') { + return { data: item.delta.content, id: chunk.id, type: 'text' }; + } + + if (item.delta?.tool_calls) { + return { + data: item.delta.tool_calls.map( + (value, index): StreamToolCallChunk => ({ + function: value.function, + id: value.id || generateToolCallId(index, value.function?.name), + + // mistral's tool calling don't have index and function field, it's data like: + // [{"id":"xbhnmTtY7","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"A photo of a small, fluffy dog with a playful expression and wagging tail.\", \"A watercolor painting of a small, energetic dog with a glossy coat and bright eyes.\", \"A vector illustration of a small, adorable dog with a short snout and perky ears.\", \"A drawing of a small, scruffy dog with a mischievous grin and a wagging tail.\"], \"quality\": \"standard\", \"seeds\": [123456, 654321, 111222, 333444], \"size\": \"1024x1024\", \"style\": \"vivid\"}"}}] + + // minimax's tool calling don't have index field, it's data like: + // [{"id":"call_function_4752059746","type":"function","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"一个流浪的地球,背景是浩瀚"}}] + + // so we need to add these default values + index: typeof value.index !== 'undefined' ? value.index : index, + type: value.type || 'function', + }), + ), + id: chunk.id, + type: 'tool_calls', + }; + } + + // 给定结束原因 + if (item.finish_reason) { + return { data: item.finish_reason, id: chunk.id, type: 'stop' }; + } + + if (item.delta.content === null) { + return { data: item.delta, id: chunk.id, type: 'data' }; + } + + // 其余情况下,返回 delta 和 index + return { + data: { delta: item.delta, id: chunk.id, index: item.index }, + id: chunk.id, + type: 'data', + }; +}; const chatStreamable = async function* (stream: AsyncIterable) { for await (const response of stream) { diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts index 658c36b71e1b..8c5b21cd0c96 100644 --- a/src/libs/agent-runtime/utils/streams/protocol.ts +++ b/src/libs/agent-runtime/utils/streams/protocol.ts @@ -6,47 +6,21 @@ export interface StreamProtocolChunk { type: 'text' | 'tool_calls' | 'data' | 'stop'; } -export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): StreamProtocolChunk => { - // maybe need another structure to add support for multiple choices - const item = chunk.choices[0]; - - if (typeof item.delta?.content === 'string') { - return { data: item.delta.content, id: chunk.id, type: 'text' }; - } - - if (item.delta?.tool_calls) { - return { - data: item.delta.tool_calls.map((value, index) => ({ - ...value, - - // mistral's tool calling don't have index and function field, it's data like: - // [{"id":"xbhnmTtY7","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"A photo of a small, fluffy dog with a playful expression and wagging tail.\", \"A watercolor painting of a small, energetic dog with a glossy coat and bright eyes.\", \"A vector illustration of a small, adorable dog with a short snout and perky ears.\", \"A drawing of a small, scruffy dog with a mischievous grin and a wagging tail.\"], \"quality\": \"standard\", \"seeds\": [123456, 654321, 111222, 333444], \"size\": \"1024x1024\", \"style\": \"vivid\"}"}}] - - // minimax's tool calling don't have index field, it's data like: - // [{"id":"call_function_4752059746","type":"function","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"一个流浪的地球,背景是浩瀚"}}] - - // so we need to add these default values - index: typeof value.index !== 'undefined' ? value.index : index, - type: value.type || 'function', - })), - id: chunk.id, - type: 'tool_calls', - }; - } +export interface StreamToolCallChunk { + function?: { + arguments?: string; + name?: string | null; + }; + id: string; + index: number; + type: 'function' | string; +} - // 给定结束原因 - if (item.finish_reason) { - return { data: item.finish_reason, id: chunk.id, type: 'stop' }; - } +export const generateToolCallId = (index: number, functionName?: string) => + `${functionName || 'unknown_tool_call'}_${index}`; - if (item.delta.content === null) { - return { data: item.delta, id: chunk.id, type: 'data' }; +export const chatStreamable = async function* (stream: AsyncIterable) { + for await (const response of stream) { + yield response; } - - // 其余情况下,返回 delta 和 index - return { - data: { delta: item.delta, id: chunk.id, index: item.index }, - id: chunk.id, - type: 'data', - }; }; From ca4f30beee4434c922c5a37517575fdddba19280 Mon Sep 17 00:00:00 2001 From: arvinxx Date: Fri, 10 May 2024 00:03:26 +0800 Subject: [PATCH 10/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20anthropic?= =?UTF-8?q?=20tool=20calling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 2 +- src/config/modelProviders/anthropic.ts | 3 + src/libs/agent-runtime/anthropic/index.ts | 90 +++++++++++++--- src/libs/agent-runtime/types/chat.ts | 1 + .../agent-runtime/utils/anthropicHelpers.ts | 64 +++++++++-- .../agent-runtime/utils/streams/anthropic.ts | 102 ++++++++++++++++++ .../agent-runtime/utils/streams/google-ai.ts | 4 +- src/libs/agent-runtime/utils/streams/index.ts | 1 + .../agent-runtime/utils/streams/openai.ts | 11 +- .../agent-runtime/utils/streams/protocol.ts | 11 +- 10 files changed, 259 insertions(+), 30 deletions(-) create mode 100644 src/libs/agent-runtime/utils/streams/anthropic.ts diff --git a/package.json b/package.json index 3ddfca595c23..abe1a4e09871 100644 --- a/package.json +++ b/package.json @@ -82,7 +82,7 @@ }, "dependencies": { "@ant-design/icons": "^5.3.6", - "@anthropic-ai/sdk": "^0.18.0", + "@anthropic-ai/sdk": "^0.20.9", "@auth/core": "0.28.0", "@aws-sdk/client-bedrock-runtime": "^3.565.0", "@azure/openai": "^1.0.0-beta.12", diff --git a/src/config/modelProviders/anthropic.ts b/src/config/modelProviders/anthropic.ts index 93bcfb40f748..01907c7114f7 100644 --- a/src/config/modelProviders/anthropic.ts +++ b/src/config/modelProviders/anthropic.ts @@ -8,6 +8,7 @@ const Anthropic: ModelProviderCard = { 'Ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments', displayName: 'Claude 3 Sonnet', enabled: true, + functionCall: true, id: 'claude-3-sonnet-20240229', maxOutput: 4096, tokens: 200_000, @@ -18,6 +19,7 @@ const Anthropic: ModelProviderCard = { 'Most powerful model for highly complex tasks. Top-level performance, intelligence, fluency, and understanding', displayName: 'Claude 3 Opus', enabled: true, + functionCall: true, id: 'claude-3-opus-20240229', maxOutput: 4096, tokens: 200_000, @@ -28,6 +30,7 @@ const Anthropic: ModelProviderCard = { 'Fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance', displayName: 'Claude 3 Haiku', enabled: true, + functionCall: true, id: 'claude-3-haiku-20240307', maxOutput: 4096, tokens: 200_000, diff --git a/src/libs/agent-runtime/anthropic/index.ts b/src/libs/agent-runtime/anthropic/index.ts index 6467ae5ded8a..936ab9b2a9eb 100644 --- a/src/libs/agent-runtime/anthropic/index.ts +++ b/src/libs/agent-runtime/anthropic/index.ts @@ -1,7 +1,6 @@ // sort-imports-ignore import '@anthropic-ai/sdk/shims/web'; import Anthropic from '@anthropic-ai/sdk'; -import { AnthropicStream, StreamingTextResponse } from 'ai'; import { ClientOptions } from 'openai'; import { LobeRuntimeAI } from '../BaseAI'; @@ -11,6 +10,8 @@ import { AgentRuntimeError } from '../utils/createError'; import { debugStream } from '../utils/debugStream'; import { desensitizeUrl } from '../utils/desensitizeUrl'; import { buildAnthropicMessages } from '../utils/anthropicHelpers'; +import { StreamingResponse } from '../utils/response'; +import { AnthropicStream } from '../utils/streams'; const DEFAULT_BASE_URL = 'https://api.anthropic.com'; @@ -30,18 +31,40 @@ export class LobeAnthropicAI implements LobeRuntimeAI { try { const anthropicPayload = this.buildAnthropicPayload(payload); - const response = await this.client.messages.create( - { ...anthropicPayload, stream: true }, + // if there is no tool, we can use the normal chat API + if (!anthropicPayload.tools || anthropicPayload.tools.length === 0) { + const response = await this.client.messages.create( + { ...anthropicPayload, stream: true }, + { + signal: options?.signal, + }, + ); + + const [prod, debug] = response.tee(); + + if (process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION === '1') { + debugStream(debug.toReadableStream()).catch(console.error); + } + + return StreamingResponse(AnthropicStream(prod, options?.callback), { + headers: options?.headers, + }); + } + + // or we should call the tool API + const response = await this.client.beta.tools.messages.create( + { ...anthropicPayload, stream: false }, { signal: options?.signal }, ); - const [prod, debug] = response.tee(); - if (process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION === '1') { - debugStream(debug.toReadableStream()).catch(console.error); + console.log('\n[no stream response]\n'); + console.log(JSON.stringify(response) + '\n'); } - return new StreamingTextResponse(AnthropicStream(prod, options?.callback), { + const stream = this.transformResponseToStream(response); + + return StreamingResponse(AnthropicStream(stream, options?.callback), { headers: options?.headers, }); } catch (error) { @@ -85,20 +108,59 @@ export class LobeAnthropicAI implements LobeRuntimeAI { } private buildAnthropicPayload(payload: ChatStreamPayload) { - const { messages, model, max_tokens, temperature, top_p } = payload; + const { messages, model, max_tokens = 4096, temperature, top_p, tools } = payload; const system_message = messages.find((m) => m.role === 'system'); const user_messages = messages.filter((m) => m.role !== 'system'); return { - max_tokens: max_tokens || 4096, + max_tokens, messages: buildAnthropicMessages(user_messages), - model: model, - stream: true, + model, system: system_message?.content as string, - temperature: temperature, - top_p: top_p, - }; + temperature, + // TODO: Anthropic sdk don't have tools interface currently + // @ts-ignore + tools: tools?.map( + (tool): Anthropic.Beta.Tools.Tool => ({ + description: tool.function.description, + input_schema: tool.function.parameters as Anthropic.Beta.Tools.Tool.InputSchema, + name: tool.function.name, + }), + ), + top_p, + } satisfies Anthropic.MessageCreateParams; } + + private transformResponseToStream = (response: Anthropic.Beta.Tools.ToolsBetaMessage) => { + return new ReadableStream({ + start(controller) { + response.content.forEach((content) => { + switch (content.type) { + case 'text': { + controller.enqueue({ + delta: { text: content.text, type: 'text_delta' }, + type: 'content_block_delta', + } as Anthropic.ContentBlockDeltaEvent); + break; + } + case 'tool_use': { + controller.enqueue({ + delta: { + tool_use: { id: content.id, input: content.input, name: content.name }, + type: 'tool_use', + }, + type: 'content_block_delta', + } as any); + } + } + }); + + controller.enqueue({ type: 'message_stop' } as Anthropic.MessageStopEvent); + + controller.close(); + }, + }); + }; } export default LobeAnthropicAI; diff --git a/src/libs/agent-runtime/types/chat.ts b/src/libs/agent-runtime/types/chat.ts index b7b5eb918d1d..93aebf994997 100644 --- a/src/libs/agent-runtime/types/chat.ts +++ b/src/libs/agent-runtime/types/chat.ts @@ -32,6 +32,7 @@ export interface OpenAIChatMessage { * @description 消息发送者的角色 */ role: LLMRoleType; + tool_call_id?: string; tool_calls?: MessageToolCall[]; } diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.ts b/src/libs/agent-runtime/utils/anthropicHelpers.ts index 0528f296a38f..02779fddd05d 100644 --- a/src/libs/agent-runtime/utils/anthropicHelpers.ts +++ b/src/libs/agent-runtime/utils/anthropicHelpers.ts @@ -30,13 +30,63 @@ export const buildAnthropicMessage = ( message: OpenAIChatMessage, ): Anthropic.Messages.MessageParam => { const content = message.content as string | UserMessageContentPart[]; - return { - content: typeof content === 'string' ? content : content.map((c) => buildAnthropicBlock(c)), - role: - message.role === 'tool' || message.role === 'function' || message.role === 'system' - ? 'assistant' - : message.role, - }; + + switch (message.role) { + case 'system': { + return { content: content as string, role: 'user' }; + } + + case 'user': { + return { + content: typeof content === 'string' ? content : content.map((c) => buildAnthropicBlock(c)), + role: 'user', + }; + } + + case 'tool': { + // refs: https://docs.anthropic.com/claude/docs/tool-use#tool-use-and-tool-result-content-blocks + return { + content: [ + { + content: message.content, + tool_use_id: message.tool_call_id, + type: 'tool_result', + } as any, + ], + role: 'user', + }; + } + + case 'assistant': { + // if there is tool_calls , we need to covert the tool_calls to tool_use content block + // refs: https://docs.anthropic.com/claude/docs/tool-use#tool-use-and-tool-result-content-blocks + if (message.tool_calls) { + return { + content: [ + // avoid empty text content block + !!message.content && { + text: message.content as string, + type: 'text', + }, + ...(message.tool_calls.map((tool) => ({ + id: tool.id, + input: JSON.parse(tool.function.arguments), + name: tool.function.name, + type: 'tool_use', + })) as any), + ].filter(Boolean), + role: 'assistant', + }; + } + + // or it's a plain assistant message + return { content: content as string, role: 'assistant' }; + } + + case 'function': { + return { content: content as string, role: 'assistant' }; + } + } }; export const buildAnthropicMessages = ( diff --git a/src/libs/agent-runtime/utils/streams/anthropic.ts b/src/libs/agent-runtime/utils/streams/anthropic.ts new file mode 100644 index 000000000000..f35561b92f98 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/anthropic.ts @@ -0,0 +1,102 @@ +import Anthropic from '@anthropic-ai/sdk'; +import type { Stream } from '@anthropic-ai/sdk/streaming'; +import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai'; + +import { ChatStreamCallbacks } from '../../types'; +import { + StreamProtocolChunk, + StreamProtocolToolCallChunk, + StreamToolCallChunkData, +} from './protocol'; + +interface StreamStack { + id: string; +} + +export const transformAnthropicStream = ( + chunk: Anthropic.MessageStreamEvent, + stack: StreamStack, +): StreamProtocolChunk => { + // maybe need another structure to add support for multiple choices + switch (chunk.type) { + case 'message_start': { + stack.id = chunk.message.id; + return { data: chunk.message, id: chunk.message.id, type: 'data' }; + } + + // case 'content_block_start': { + // return { data: chunk.content_block.text, id: stack.id, type: 'data' }; + // } + + case 'content_block_delta': { + switch (chunk.delta.type as string) { + default: + case 'text_delta': { + return { data: chunk.delta.text, id: stack.id, type: 'text' }; + } + + // TODO: due to anthropic currently don't support streaming tool calling + // we need to add this new `tool_use` type to support streaming + // and maybe we need to update it when the feature is available + case 'tool_use': { + const delta = (chunk.delta as any).tool_use as Anthropic.Beta.Tools.ToolUseBlock; + + const toolCall: StreamToolCallChunkData = { + function: { arguments: JSON.stringify(delta.input), name: delta.name }, + id: delta.id, + index: 0, + type: 'function', + }; + + return { + data: [toolCall], + id: stack.id, + type: 'tool_calls', + } as StreamProtocolToolCallChunk; + } + } + } + + case 'message_delta': { + return { data: chunk.delta.stop_reason, id: stack.id, type: 'stop' }; + } + + case 'message_stop': { + return { data: 'message_stop', id: stack.id, type: 'stop' }; + } + + default: { + return { data: chunk, id: stack.id, type: 'data' }; + } + } +}; + +const chatStreamable = async function* (stream: AsyncIterable) { + for await (const response of stream) { + yield response; + } +}; + +export const AnthropicStream = ( + stream: Stream | ReadableStream, + callbacks?: ChatStreamCallbacks, +) => { + const streamStack: StreamStack = { id: '' }; + + const readableStream = + stream instanceof ReadableStream ? stream : readableFromAsyncIterable(chatStreamable(stream)); + + return readableStream + .pipeThrough( + new TransformStream({ + transform: (chunk, controller) => { + const { type, id, data } = transformAnthropicStream(chunk, streamStack); + + controller.enqueue(`id: ${id}\n`); + controller.enqueue(`event: ${type}\n`); + controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); + }, + }), + ) + .pipeThrough(createCallbacksTransformer(callbacks)); +}; diff --git a/src/libs/agent-runtime/utils/streams/google-ai.ts b/src/libs/agent-runtime/utils/streams/google-ai.ts index cf41e51d1747..46e36a7bdcf6 100644 --- a/src/libs/agent-runtime/utils/streams/google-ai.ts +++ b/src/libs/agent-runtime/utils/streams/google-ai.ts @@ -9,7 +9,7 @@ import { nanoid } from '@/utils/uuid'; import { ChatStreamCallbacks } from '../../types'; import { StreamProtocolChunk, - StreamToolCallChunk, + StreamToolCallChunkData, chatStreamable, generateToolCallId, } from './protocol'; @@ -23,7 +23,7 @@ const transformGoogleGenerativeAIStream = ( if (functionCalls) { return { data: functionCalls.map( - (value, index): StreamToolCallChunk => ({ + (value, index): StreamToolCallChunkData => ({ function: { arguments: JSON.stringify(value.args), name: value.name, diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts index 47bec33c4374..c40d3e21a8e0 100644 --- a/src/libs/agent-runtime/utils/streams/index.ts +++ b/src/libs/agent-runtime/utils/streams/index.ts @@ -1,3 +1,4 @@ +export * from './anthropic'; export * from './google-ai'; export * from './openai'; export * from './minimax'; diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts index 4b6bbb401170..3695c6505d7a 100644 --- a/src/libs/agent-runtime/utils/streams/openai.ts +++ b/src/libs/agent-runtime/utils/streams/openai.ts @@ -3,7 +3,12 @@ import OpenAI from 'openai'; import type { Stream } from 'openai/streaming'; import { ChatStreamCallbacks } from '../../types'; -import { StreamProtocolChunk, StreamToolCallChunk, generateToolCallId } from './protocol'; +import { + StreamProtocolChunk, + StreamProtocolToolCallChunk, + StreamToolCallChunkData, + generateToolCallId, +} from './protocol'; export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): StreamProtocolChunk => { // maybe need another structure to add support for multiple choices @@ -16,7 +21,7 @@ export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): Stream if (item.delta?.tool_calls) { return { data: item.delta.tool_calls.map( - (value, index): StreamToolCallChunk => ({ + (value, index): StreamToolCallChunkData => ({ function: value.function, id: value.id || generateToolCallId(index, value.function?.name), @@ -33,7 +38,7 @@ export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): Stream ), id: chunk.id, type: 'tool_calls', - }; + } as StreamProtocolToolCallChunk; } // 给定结束原因 diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts index 8c5b21cd0c96..b4f74d64e932 100644 --- a/src/libs/agent-runtime/utils/streams/protocol.ts +++ b/src/libs/agent-runtime/utils/streams/protocol.ts @@ -1,12 +1,10 @@ -import OpenAI from 'openai'; - export interface StreamProtocolChunk { data: any; id: string; type: 'text' | 'tool_calls' | 'data' | 'stop'; } -export interface StreamToolCallChunk { +export interface StreamToolCallChunkData { function?: { arguments?: string; name?: string | null; @@ -16,6 +14,13 @@ export interface StreamToolCallChunk { type: 'function' | string; } +export interface StreamProtocolToolCallChunk { + data: StreamToolCallChunkData[]; + id: string; + index: number; + type: 'tool_calls'; +} + export const generateToolCallId = (index: number, functionName?: string) => `${functionName || 'unknown_tool_call'}_${index}`; From bd0b3f7e038315fb2454576f5f7a2fef27cfc2cf Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Fri, 10 May 2024 07:47:33 +0000 Subject: [PATCH 11/24] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20refactor?= =?UTF-8?q?=20ollama=20stream?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/libs/agent-runtime/ollama/index.ts | 6 +-- src/libs/agent-runtime/ollama/stream.ts | 31 ------------ .../agent-runtime/utils/streams/anthropic.ts | 5 +- src/libs/agent-runtime/utils/streams/index.ts | 1 + .../agent-runtime/utils/streams/ollama.ts | 48 +++++++++++++++++++ .../agent-runtime/utils/streams/protocol.ts | 6 ++- 6 files changed, 58 insertions(+), 39 deletions(-) delete mode 100644 src/libs/agent-runtime/ollama/stream.ts create mode 100644 src/libs/agent-runtime/utils/streams/ollama.ts diff --git a/src/libs/agent-runtime/ollama/index.ts b/src/libs/agent-runtime/ollama/index.ts index b36ec48f9ba3..80a47ad48423 100644 --- a/src/libs/agent-runtime/ollama/index.ts +++ b/src/libs/agent-runtime/ollama/index.ts @@ -1,15 +1,15 @@ -import { StreamingTextResponse } from 'ai'; import { Ollama } from 'ollama/browser'; import { ClientOptions } from 'openai'; import { OpenAIChatMessage } from '@/libs/agent-runtime'; -import { OllamaStream } from '@/libs/agent-runtime/ollama/stream'; import { ChatModelCard } from '@/types/llm'; import { LobeRuntimeAI } from '../BaseAI'; import { AgentRuntimeErrorType } from '../error'; import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../types'; import { AgentRuntimeError } from '../utils/createError'; +import { StreamingResponse } from '../utils/response'; +import { OllamaStream } from '../utils/streams'; import { parseDataUri } from '../utils/uriParser'; import { OllamaMessage } from './type'; @@ -51,7 +51,7 @@ export class LobeOllamaAI implements LobeRuntimeAI { stream: true, }); - return new StreamingTextResponse(OllamaStream(response, options?.callback), { + return StreamingResponse(OllamaStream(response, options?.callback), { headers: options?.headers, }); } catch (error) { diff --git a/src/libs/agent-runtime/ollama/stream.ts b/src/libs/agent-runtime/ollama/stream.ts deleted file mode 100644 index 02a019f04885..000000000000 --- a/src/libs/agent-runtime/ollama/stream.ts +++ /dev/null @@ -1,31 +0,0 @@ -// copy from https://github.com/vercel/ai/discussions/539#discussioncomment-8193721 -// and I have remove the unnecessary code -import { - type AIStreamCallbacksAndOptions, - createCallbacksTransformer, - createStreamDataTransformer, - readableFromAsyncIterable, -} from 'ai'; -import { ChatResponse } from 'ollama/browser'; - -// A modified version of the streamable function specifically for chat messages -const chatStreamable = async function* (stream: AsyncIterable) { - for await (const response of stream) { - if (response.message) { - yield response.message; - } - if (response.done) { - // Additional final response data can be handled here if necessary - return; - } - } -}; - -export const OllamaStream = ( - res: AsyncIterable, - cb?: AIStreamCallbacksAndOptions, -): ReadableStream => { - return readableFromAsyncIterable(chatStreamable(res)) - .pipeThrough(createCallbacksTransformer(cb) as any) - .pipeThrough(createStreamDataTransformer(cb?.experimental_streamData)); -}; diff --git a/src/libs/agent-runtime/utils/streams/anthropic.ts b/src/libs/agent-runtime/utils/streams/anthropic.ts index f35561b92f98..ff190340d12b 100644 --- a/src/libs/agent-runtime/utils/streams/anthropic.ts +++ b/src/libs/agent-runtime/utils/streams/anthropic.ts @@ -6,13 +6,10 @@ import { ChatStreamCallbacks } from '../../types'; import { StreamProtocolChunk, StreamProtocolToolCallChunk, + StreamStack, StreamToolCallChunkData, } from './protocol'; -interface StreamStack { - id: string; -} - export const transformAnthropicStream = ( chunk: Anthropic.MessageStreamEvent, stack: StreamStack, diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts index c40d3e21a8e0..0a9277369426 100644 --- a/src/libs/agent-runtime/utils/streams/index.ts +++ b/src/libs/agent-runtime/utils/streams/index.ts @@ -1,5 +1,6 @@ export * from './anthropic'; export * from './google-ai'; +export * from './ollama'; export * from './openai'; export * from './minimax'; export * from './protocol'; diff --git a/src/libs/agent-runtime/utils/streams/ollama.ts b/src/libs/agent-runtime/utils/streams/ollama.ts new file mode 100644 index 000000000000..c0f5b4cc7c60 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/ollama.ts @@ -0,0 +1,48 @@ +// copy from https://github.com/vercel/ai/discussions/539#discussioncomment-8193721 +// and I have remove the unnecessary code +import { + type AIStreamCallbacksAndOptions, + createCallbacksTransformer, + readableFromAsyncIterable, +} from 'ai'; +import { ChatResponse } from 'ollama/browser'; + +import { nanoid } from '@/utils/uuid'; + +import { StreamProtocolChunk, StreamStack } from './protocol'; + +const transformOllamaStream = (chunk: ChatResponse, stack: StreamStack): StreamProtocolChunk => { + // maybe need another structure to add support for multiple choices + if (chunk.done) { + return { data: 'finished', id: stack.id, type: 'stop' }; + } + + return { data: chunk.message.content, id: stack.id, type: 'text' }; +}; + +const chatStreamable = async function* (stream: AsyncIterable) { + for await (const response of stream) { + yield response; + } +}; + +export const OllamaStream = ( + res: AsyncIterable, + cb?: AIStreamCallbacksAndOptions, +): ReadableStream => { + const streamStack: StreamStack = { id: 'chat_' + nanoid() }; + + return readableFromAsyncIterable(chatStreamable(res)) + .pipeThrough( + new TransformStream({ + transform: (chunk, controller) => { + const { type, id, data } = transformOllamaStream(chunk, streamStack); + + controller.enqueue(`id: ${id}\n`); + controller.enqueue(`event: ${type}\n`); + controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); + }, + }), + ) + .pipeThrough(createCallbacksTransformer(cb) as any); +}; diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts index b4f74d64e932..2746d4f736b7 100644 --- a/src/libs/agent-runtime/utils/streams/protocol.ts +++ b/src/libs/agent-runtime/utils/streams/protocol.ts @@ -1,6 +1,10 @@ +export interface StreamStack { + id: string; +} + export interface StreamProtocolChunk { data: any; - id: string; + id?: string; type: 'text' | 'tool_calls' | 'data' | 'stop'; } From 24b6abbbd10cbcc38f51d0af2b8f0cc5b742507d Mon Sep 17 00:00:00 2001 From: arvinxx Date: Fri, 10 May 2024 21:26:08 +0800 Subject: [PATCH 12/24] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20refactor?= =?UTF-8?q?=20the=20protocol=20stream=20transformer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../agent-runtime/utils/streams/anthropic.ts | 13 +--- .../agent-runtime/utils/streams/google-ai.ts | 63 +++---------------- .../agent-runtime/utils/streams/minimax.ts | 21 ++++--- .../agent-runtime/utils/streams/ollama.ts | 16 +---- .../agent-runtime/utils/streams/openai.ts | 13 +--- .../agent-runtime/utils/streams/protocol.ts | 14 +++++ 6 files changed, 41 insertions(+), 99 deletions(-) diff --git a/src/libs/agent-runtime/utils/streams/anthropic.ts b/src/libs/agent-runtime/utils/streams/anthropic.ts index ff190340d12b..d60f3362470e 100644 --- a/src/libs/agent-runtime/utils/streams/anthropic.ts +++ b/src/libs/agent-runtime/utils/streams/anthropic.ts @@ -8,6 +8,7 @@ import { StreamProtocolToolCallChunk, StreamStack, StreamToolCallChunkData, + createSSEProtocolTransformer, } from './protocol'; export const transformAnthropicStream = ( @@ -84,16 +85,6 @@ export const AnthropicStream = ( stream instanceof ReadableStream ? stream : readableFromAsyncIterable(chatStreamable(stream)); return readableStream - .pipeThrough( - new TransformStream({ - transform: (chunk, controller) => { - const { type, id, data } = transformAnthropicStream(chunk, streamStack); - - controller.enqueue(`id: ${id}\n`); - controller.enqueue(`event: ${type}\n`); - controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); - }, - }), - ) + .pipeThrough(createSSEProtocolTransformer(transformAnthropicStream, streamStack)) .pipeThrough(createCallbacksTransformer(callbacks)); }; diff --git a/src/libs/agent-runtime/utils/streams/google-ai.ts b/src/libs/agent-runtime/utils/streams/google-ai.ts index 46e36a7bdcf6..92a6bfd10c8d 100644 --- a/src/libs/agent-runtime/utils/streams/google-ai.ts +++ b/src/libs/agent-runtime/utils/streams/google-ai.ts @@ -9,13 +9,16 @@ import { nanoid } from '@/utils/uuid'; import { ChatStreamCallbacks } from '../../types'; import { StreamProtocolChunk, + StreamStack, StreamToolCallChunkData, chatStreamable, + createSSEProtocolTransformer, generateToolCallId, } from './protocol'; const transformGoogleGenerativeAIStream = ( chunk: EnhancedGenerateContentResponse, + stack: StreamStack, ): StreamProtocolChunk => { // maybe need another structure to add support for multiple choices const functionCalls = chunk.functionCalls(); @@ -33,7 +36,7 @@ const transformGoogleGenerativeAIStream = ( type: 'function', }), ), - id: nanoid(), + id: stack.id, type: 'tool_calls', }; } @@ -41,48 +44,9 @@ const transformGoogleGenerativeAIStream = ( return { data: text, - id: nanoid(), + id: stack?.id, type: 'text', }; - // if (typeof item.delta?.content === 'string') { - // return { data: item.delta.content, id: chunk.id, type: 'text' }; - // } - // - // if (item.delta?.tool_calls) { - // return { - // data: item.delta.tool_calls.map((value, index) => ({ - // ...value, - // - // // mistral's tool calling don't have index and function field, it's data like: - // // [{"id":"xbhnmTtY7","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"A photo of a small, fluffy dog with a playful expression and wagging tail.\", \"A watercolor painting of a small, energetic dog with a glossy coat and bright eyes.\", \"A vector illustration of a small, adorable dog with a short snout and perky ears.\", \"A drawing of a small, scruffy dog with a mischievous grin and a wagging tail.\"], \"quality\": \"standard\", \"seeds\": [123456, 654321, 111222, 333444], \"size\": \"1024x1024\", \"style\": \"vivid\"}"}}] - // - // // minimax's tool calling don't have index field, it's data like: - // // [{"id":"call_function_4752059746","type":"function","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"一个流浪的地球,背景是浩瀚"}}] - // - // // so we need to add these default values - // index: typeof value.index !== 'undefined' ? value.index : index, - // type: value.type || 'function', - // })), - // id: chunk.id, - // type: 'tool_calls', - // }; - // } - // - // // 给定结束原因 - // if (item.finish_reason) { - // return { data: item.finish_reason, id: chunk.id, type: 'stop' }; - // } - // - // if (item.delta.content === null) { - // return { data: item.delta, id: chunk.id, type: 'data' }; - // } - // - // // 其余情况下,返回 delta 和 index - // return { - // data: { delta: item.delta, id: chunk.id, index: item.index }, - // id: chunk.id, - // type: 'data', - // }; }; // only use for debug @@ -94,17 +58,10 @@ export const googleGenAIResultToStream = (stream: GenerateContentStreamResult) = export const GoogleGenerativeAIStream = ( rawStream: ReadableStream, callbacks?: ChatStreamCallbacks, -) => - rawStream - .pipeThrough( - new TransformStream({ - transform: (chunk, controller) => { - const { type, id, data } = transformGoogleGenerativeAIStream(chunk); +) => { + const streamStack: StreamStack = { id: 'chat_' + nanoid() }; - controller.enqueue(`id: ${id}\n`); - controller.enqueue(`event: ${type}\n`); - controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); - }, - }), - ) + return rawStream + .pipeThrough(createSSEProtocolTransformer(transformGoogleGenerativeAIStream, streamStack)) .pipeThrough(createCallbacksTransformer(callbacks)); +}; diff --git a/src/libs/agent-runtime/utils/streams/minimax.ts b/src/libs/agent-runtime/utils/streams/minimax.ts index cb53534d6468..a517b9f794e4 100644 --- a/src/libs/agent-runtime/utils/streams/minimax.ts +++ b/src/libs/agent-runtime/utils/streams/minimax.ts @@ -3,6 +3,7 @@ import OpenAI from 'openai'; import { ChatStreamCallbacks } from '../../types'; import { transformOpenAIStream } from './openai'; +import { createSSEProtocolTransformer } from './protocol'; const unit8ArrayToJSONChunk = (unit8Array: Uint8Array): OpenAI.ChatCompletionChunk => { const decoder = new TextDecoder(); @@ -17,22 +18,22 @@ const unit8ArrayToJSONChunk = (unit8Array: Uint8Array): OpenAI.ChatCompletionChu chunkValue = chunkValue.slice(5).trim(); } - return JSON.parse(chunkValue); + try { + return JSON.parse(chunkValue); + } catch (e) { + console.error('minimax chunk parse error:', e); + + return { raw: chunkValue } as any; + } }; export const MinimaxStream = (stream: ReadableStream, callbacks?: ChatStreamCallbacks) => { return stream .pipeThrough( - new TransformStream({ - transform: (buffer, controller) => { - const chunk = unit8ArrayToJSONChunk(buffer); - - const { type, id, data } = transformOpenAIStream(chunk); + createSSEProtocolTransformer((buffer) => { + const chunk = unit8ArrayToJSONChunk(buffer); - controller.enqueue(`id: ${id}\n`); - controller.enqueue(`event: ${type}\n`); - controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); - }, + return transformOpenAIStream(chunk); }), ) .pipeThrough(createCallbacksTransformer(callbacks)); diff --git a/src/libs/agent-runtime/utils/streams/ollama.ts b/src/libs/agent-runtime/utils/streams/ollama.ts index c0f5b4cc7c60..728b78dedff2 100644 --- a/src/libs/agent-runtime/utils/streams/ollama.ts +++ b/src/libs/agent-runtime/utils/streams/ollama.ts @@ -1,5 +1,3 @@ -// copy from https://github.com/vercel/ai/discussions/539#discussioncomment-8193721 -// and I have remove the unnecessary code import { type AIStreamCallbacksAndOptions, createCallbacksTransformer, @@ -9,7 +7,7 @@ import { ChatResponse } from 'ollama/browser'; import { nanoid } from '@/utils/uuid'; -import { StreamProtocolChunk, StreamStack } from './protocol'; +import { StreamProtocolChunk, StreamStack, createSSEProtocolTransformer } from './protocol'; const transformOllamaStream = (chunk: ChatResponse, stack: StreamStack): StreamProtocolChunk => { // maybe need another structure to add support for multiple choices @@ -33,16 +31,6 @@ export const OllamaStream = ( const streamStack: StreamStack = { id: 'chat_' + nanoid() }; return readableFromAsyncIterable(chatStreamable(res)) - .pipeThrough( - new TransformStream({ - transform: (chunk, controller) => { - const { type, id, data } = transformOllamaStream(chunk, streamStack); - - controller.enqueue(`id: ${id}\n`); - controller.enqueue(`event: ${type}\n`); - controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); - }, - }), - ) + .pipeThrough(createSSEProtocolTransformer(transformOllamaStream, streamStack)) .pipeThrough(createCallbacksTransformer(cb) as any); }; diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts index 3695c6505d7a..908c6575a6fd 100644 --- a/src/libs/agent-runtime/utils/streams/openai.ts +++ b/src/libs/agent-runtime/utils/streams/openai.ts @@ -7,6 +7,7 @@ import { StreamProtocolChunk, StreamProtocolToolCallChunk, StreamToolCallChunkData, + createSSEProtocolTransformer, generateToolCallId, } from './protocol'; @@ -72,16 +73,6 @@ export const OpenAIStream = ( stream instanceof ReadableStream ? stream : readableFromAsyncIterable(chatStreamable(stream)); return readableStream - .pipeThrough( - new TransformStream({ - transform: (chunk, controller) => { - const { type, id, data } = transformOpenAIStream(chunk); - - controller.enqueue(`id: ${id}\n`); - controller.enqueue(`event: ${type}\n`); - controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); - }, - }), - ) + .pipeThrough(createSSEProtocolTransformer(transformOpenAIStream)) .pipeThrough(createCallbacksTransformer(callbacks)); }; diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts index 2746d4f736b7..408220fbb77d 100644 --- a/src/libs/agent-runtime/utils/streams/protocol.ts +++ b/src/libs/agent-runtime/utils/streams/protocol.ts @@ -33,3 +33,17 @@ export const chatStreamable = async function* (stream: AsyncIterable) { yield response; } }; + +export const createSSEProtocolTransformer = ( + transformer: (chunk: any, stack: StreamStack) => StreamProtocolChunk, + streamStack?: StreamStack, +) => + new TransformStream({ + transform: (chunk, controller) => { + const { type, id, data } = transformer(chunk, streamStack || { id: '' }); + + controller.enqueue(`id: ${id}\n`); + controller.enqueue(`event: ${type}\n`); + controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); + }, + }); From 83cf6ecee95d8b26343e544f90a40064f662104a Mon Sep 17 00:00:00 2001 From: arvinxx Date: Fri, 10 May 2024 21:39:55 +0800 Subject: [PATCH 13/24] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20refactor?= =?UTF-8?q?=20the=20bedrock=20stream?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/libs/agent-runtime/anthropic/index.ts | 10 +--- src/libs/agent-runtime/bedrock/index.ts | 42 +++++++++-------- .../agent-runtime/utils/anthropicHelpers.ts | 10 ++++ .../utils/streams/bedrock/claude.ts | 21 +++++++++ .../utils/streams/bedrock/common.ts | 32 +++++++++++++ .../utils/streams/bedrock/index.ts | 3 ++ .../utils/streams/bedrock/llama.ts | 46 +++++++++++++++++++ src/libs/agent-runtime/utils/streams/index.ts | 1 + 8 files changed, 139 insertions(+), 26 deletions(-) create mode 100644 src/libs/agent-runtime/utils/streams/bedrock/claude.ts create mode 100644 src/libs/agent-runtime/utils/streams/bedrock/common.ts create mode 100644 src/libs/agent-runtime/utils/streams/bedrock/index.ts create mode 100644 src/libs/agent-runtime/utils/streams/bedrock/llama.ts diff --git a/src/libs/agent-runtime/anthropic/index.ts b/src/libs/agent-runtime/anthropic/index.ts index 936ab9b2a9eb..d4a969ffda67 100644 --- a/src/libs/agent-runtime/anthropic/index.ts +++ b/src/libs/agent-runtime/anthropic/index.ts @@ -9,7 +9,7 @@ import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../typ import { AgentRuntimeError } from '../utils/createError'; import { debugStream } from '../utils/debugStream'; import { desensitizeUrl } from '../utils/desensitizeUrl'; -import { buildAnthropicMessages } from '../utils/anthropicHelpers'; +import { buildAnthropicMessages, buildAnthropicTools } from '../utils/anthropicHelpers'; import { StreamingResponse } from '../utils/response'; import { AnthropicStream } from '../utils/streams'; @@ -120,13 +120,7 @@ export class LobeAnthropicAI implements LobeRuntimeAI { temperature, // TODO: Anthropic sdk don't have tools interface currently // @ts-ignore - tools: tools?.map( - (tool): Anthropic.Beta.Tools.Tool => ({ - description: tool.function.description, - input_schema: tool.function.parameters as Anthropic.Beta.Tools.Tool.InputSchema, - name: tool.function.name, - }), - ), + tools: buildAnthropicTools(tools), top_p, } satisfies Anthropic.MessageCreateParams; } diff --git a/src/libs/agent-runtime/bedrock/index.ts b/src/libs/agent-runtime/bedrock/index.ts index 1f1a88ca17d0..ff940b0bd44e 100644 --- a/src/libs/agent-runtime/bedrock/index.ts +++ b/src/libs/agent-runtime/bedrock/index.ts @@ -2,7 +2,6 @@ import { BedrockRuntimeClient, InvokeModelWithResponseStreamCommand, } from '@aws-sdk/client-bedrock-runtime'; -import { AWSBedrockLlama2Stream, AWSBedrockStream, StreamingTextResponse } from 'ai'; import { experimental_buildLlama2Prompt } from 'ai/prompts'; import { LobeRuntimeAI } from '../BaseAI'; @@ -11,6 +10,12 @@ import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../typ import { buildAnthropicMessages } from '../utils/anthropicHelpers'; import { AgentRuntimeError } from '../utils/createError'; import { debugStream } from '../utils/debugStream'; +import { StreamingResponse } from '../utils/response'; +import { + AWSBedrockClaudeStream, + AWSBedrockLlamaStream, + createBedrockStream, +} from '../utils/streams'; export interface LobeBedrockAIParams { accessKeyId?: string; @@ -39,7 +44,7 @@ export class LobeBedrockAI implements LobeRuntimeAI { } async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions) { - if (payload.model.startsWith('meta')) return this.invokeLlamaModel(payload); + if (payload.model.startsWith('meta')) return this.invokeLlamaModel(payload, options); return this.invokeClaudeModel(payload, options); } @@ -47,7 +52,7 @@ export class LobeBedrockAI implements LobeRuntimeAI { private invokeClaudeModel = async ( payload: ChatStreamPayload, options?: ChatCompetitionOptions, - ): Promise => { + ): Promise => { const { max_tokens, messages, model, temperature, top_p } = payload; const system_message = messages.find((m) => m.role === 'system'); const user_messages = messages.filter((m) => m.role !== 'system'); @@ -68,23 +73,20 @@ export class LobeBedrockAI implements LobeRuntimeAI { try { // Ask Claude for a streaming chat completion given the prompt - const bedrockResponse = await this.client.send(command, { abortSignal: options?.signal }); + const res = await this.client.send(command, { abortSignal: options?.signal }); - // Convert the response into a friendly text-stream - const stream = AWSBedrockStream( - bedrockResponse, - options?.callback, - (chunk) => chunk.delta?.text, - ); + const claudeStream = createBedrockStream(res); - const [debug, output] = stream.tee(); + const [prod, debug] = claudeStream.tee(); if (process.env.DEBUG_BEDROCK_CHAT_COMPLETION === '1') { debugStream(debug).catch(console.error); } // Respond with the stream - return new StreamingTextResponse(output); + return StreamingResponse(AWSBedrockClaudeStream(prod, options?.callback), { + headers: options?.headers, + }); } catch (e) { const err = e as Error & { $metadata: any }; @@ -101,7 +103,10 @@ export class LobeBedrockAI implements LobeRuntimeAI { } }; - private invokeLlamaModel = async (payload: ChatStreamPayload): Promise => { + private invokeLlamaModel = async ( + payload: ChatStreamPayload, + options?: ChatCompetitionOptions, + ): Promise => { const { max_tokens, messages, model } = payload; const command = new InvokeModelWithResponseStreamCommand({ accept: 'application/json', @@ -115,18 +120,19 @@ export class LobeBedrockAI implements LobeRuntimeAI { try { // Ask Claude for a streaming chat completion given the prompt - const bedrockResponse = await this.client.send(command); + const res = await this.client.send(command); - // Convert the response into a friendly text-stream - const stream = AWSBedrockLlama2Stream(bedrockResponse); + const stream = createBedrockStream(res); - const [debug, output] = stream.tee(); + const [prod, debug] = stream.tee(); if (process.env.DEBUG_BEDROCK_CHAT_COMPLETION === '1') { debugStream(debug).catch(console.error); } // Respond with the stream - return new StreamingTextResponse(output); + return StreamingResponse(AWSBedrockLlamaStream(prod, options?.callback), { + headers: options?.headers, + }); } catch (e) { const err = e as Error & { $metadata: any }; diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.ts b/src/libs/agent-runtime/utils/anthropicHelpers.ts index 02779fddd05d..1617622f38e1 100644 --- a/src/libs/agent-runtime/utils/anthropicHelpers.ts +++ b/src/libs/agent-runtime/utils/anthropicHelpers.ts @@ -1,4 +1,5 @@ import Anthropic from '@anthropic-ai/sdk'; +import OpenAI from 'openai'; import { OpenAIChatMessage, UserMessageContentPart } from '../types'; import { parseDataUri } from './uriParser'; @@ -108,3 +109,12 @@ export const buildAnthropicMessages = ( return messages; }; + +export const buildAnthropicTools = (tools?: OpenAI.ChatCompletionTool[]) => + tools?.map( + (tool): Anthropic.Beta.Tools.Tool => ({ + description: tool.function.description, + input_schema: tool.function.parameters as Anthropic.Beta.Tools.Tool.InputSchema, + name: tool.function.name, + }), + ); diff --git a/src/libs/agent-runtime/utils/streams/bedrock/claude.ts b/src/libs/agent-runtime/utils/streams/bedrock/claude.ts new file mode 100644 index 000000000000..bfd2945b8329 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/bedrock/claude.ts @@ -0,0 +1,21 @@ +import { InvokeModelWithResponseStreamResponse } from '@aws-sdk/client-bedrock-runtime'; +import { type AIStreamCallbacksAndOptions, createCallbacksTransformer } from 'ai'; + +import { nanoid } from '@/utils/uuid'; + +import { transformAnthropicStream } from '../anthropic'; +import { StreamStack, createSSEProtocolTransformer } from '../protocol'; +import { createBedrockStream } from './common'; + +export const AWSBedrockClaudeStream = ( + res: InvokeModelWithResponseStreamResponse | ReadableStream, + cb?: AIStreamCallbacksAndOptions, +): ReadableStream => { + const streamStack: StreamStack = { id: 'chat_' + nanoid() }; + + const stream = res instanceof ReadableStream ? res : createBedrockStream(res); + + return stream + .pipeThrough(createSSEProtocolTransformer(transformAnthropicStream, streamStack)) + .pipeThrough(createCallbacksTransformer(cb) as any); +}; diff --git a/src/libs/agent-runtime/utils/streams/bedrock/common.ts b/src/libs/agent-runtime/utils/streams/bedrock/common.ts new file mode 100644 index 000000000000..dbc93b7637c3 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/bedrock/common.ts @@ -0,0 +1,32 @@ +import { + InvokeModelWithResponseStreamResponse, + ResponseStream, +} from '@aws-sdk/client-bedrock-runtime'; +import { readableFromAsyncIterable } from 'ai'; + +const chatStreamable = async function* (stream: AsyncIterable) { + for await (const response of stream) { + if (response.chunk) { + const decoder = new TextDecoder(); + + const value = decoder.decode(response.chunk.bytes, { stream: true }); + try { + const chunk = JSON.parse(value); + + yield chunk; + } catch (e) { + console.log('bedrock stream parser error:', e); + + yield value; + } + } else { + yield response; + } + } +}; + +/** + * covert the bedrock response to a readable stream + */ +export const createBedrockStream = (res: InvokeModelWithResponseStreamResponse) => + readableFromAsyncIterable(chatStreamable(res.body!)); diff --git a/src/libs/agent-runtime/utils/streams/bedrock/index.ts b/src/libs/agent-runtime/utils/streams/bedrock/index.ts new file mode 100644 index 000000000000..a25c3d7a6b43 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/bedrock/index.ts @@ -0,0 +1,3 @@ +export * from './claude'; +export * from './common'; +export * from './llama'; diff --git a/src/libs/agent-runtime/utils/streams/bedrock/llama.ts b/src/libs/agent-runtime/utils/streams/bedrock/llama.ts new file mode 100644 index 000000000000..adec93f736a5 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/bedrock/llama.ts @@ -0,0 +1,46 @@ +import { InvokeModelWithResponseStreamResponse } from '@aws-sdk/client-bedrock-runtime'; +import { type AIStreamCallbacksAndOptions, createCallbacksTransformer } from 'ai'; + +import { nanoid } from '@/utils/uuid'; + +import { StreamProtocolChunk, StreamStack, createSSEProtocolTransformer } from '../protocol'; +import { createBedrockStream } from './common'; + +interface AmazonBedrockInvocationMetrics { + firstByteLatency: number; + inputTokenCount: number; + invocationLatency: number; + outputTokenCount: number; +} +interface BedrockLlamaStreamChunk { + 'amazon-bedrock-invocationMetrics'?: AmazonBedrockInvocationMetrics; + 'generation': string; + 'generation_token_count': number; + 'prompt_token_count'?: number | null; + 'stop_reason'?: null | 'stop' | string; +} + +export const transformLlamaStream = ( + chunk: BedrockLlamaStreamChunk, + stack: StreamStack, +): StreamProtocolChunk => { + // maybe need another structure to add support for multiple choices + if (chunk.stop_reason) { + return { data: 'finished', id: stack.id, type: 'stop' }; + } + + return { data: chunk.generation, id: stack.id, type: 'text' }; +}; + +export const AWSBedrockLlamaStream = ( + res: InvokeModelWithResponseStreamResponse | ReadableStream, + cb?: AIStreamCallbacksAndOptions, +): ReadableStream => { + const streamStack: StreamStack = { id: 'chat_' + nanoid() }; + + const stream = res instanceof ReadableStream ? res : createBedrockStream(res); + + return stream + .pipeThrough(createSSEProtocolTransformer(transformLlamaStream, streamStack)) + .pipeThrough(createCallbacksTransformer(cb) as any); +}; diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts index 0a9277369426..6601b493fce8 100644 --- a/src/libs/agent-runtime/utils/streams/index.ts +++ b/src/libs/agent-runtime/utils/streams/index.ts @@ -1,4 +1,5 @@ export * from './anthropic'; +export * from './bedrock'; export * from './google-ai'; export * from './ollama'; export * from './openai'; From fb59d4f7faf0bc091a79539a9241990681c88dcd Mon Sep 17 00:00:00 2001 From: arvinxx Date: Fri, 10 May 2024 21:48:53 +0800 Subject: [PATCH 14/24] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20remove?= =?UTF-8?q?=20Stream=20Text=20Response?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/libs/agent-runtime/BaseAI.ts | 11 ++--------- src/libs/agent-runtime/utils/streams/index.ts | 2 +- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/libs/agent-runtime/BaseAI.ts b/src/libs/agent-runtime/BaseAI.ts index 44ba7e20d16d..301dc6523ba8 100644 --- a/src/libs/agent-runtime/BaseAI.ts +++ b/src/libs/agent-runtime/BaseAI.ts @@ -1,4 +1,3 @@ -import { StreamingTextResponse } from 'ai'; import OpenAI from 'openai'; import { ChatModelCard } from '@/types/llm'; @@ -7,10 +6,7 @@ import { ChatCompetitionOptions, ChatStreamPayload } from './types'; export interface LobeRuntimeAI { baseURL?: string; - chat( - payload: ChatStreamPayload, - options?: ChatCompetitionOptions, - ): Promise; + chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions): Promise; models?(): Promise; } @@ -19,10 +15,7 @@ export abstract class LobeOpenAICompatibleRuntime { abstract baseURL: string; abstract client: OpenAI; - abstract chat( - payload: ChatStreamPayload, - options?: ChatCompetitionOptions, - ): Promise; + abstract chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions): Promise; abstract models(): Promise; } diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts index 6601b493fce8..deb4a934c541 100644 --- a/src/libs/agent-runtime/utils/streams/index.ts +++ b/src/libs/agent-runtime/utils/streams/index.ts @@ -1,7 +1,7 @@ export * from './anthropic'; export * from './bedrock'; export * from './google-ai'; +export * from './minimax'; export * from './ollama'; export * from './openai'; -export * from './minimax'; export * from './protocol'; From dc4d0c2cac125c9c85a2b8b09a7a4b136c9b4346 Mon Sep 17 00:00:00 2001 From: arvinxx Date: Fri, 10 May 2024 22:38:30 +0800 Subject: [PATCH 15/24] =?UTF-8?q?=E2=9C=85=20test:=20clean=20some=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/libs/agent-runtime/groq/index.test.ts | 74 +--- src/libs/agent-runtime/minimax/index.test.ts | 4 +- src/libs/agent-runtime/mistral/index.test.ts | 53 --- src/libs/agent-runtime/moonshot/index.test.ts | 72 +--- src/libs/agent-runtime/openai/index.test.ts | 53 --- .../agent-runtime/openrouter/index.test.ts | 54 +-- .../agent-runtime/perplexity/index.test.ts | 71 ---- src/libs/agent-runtime/perplexity/index.ts | 5 +- .../agent-runtime/togetherai/index.test.ts | 54 +-- .../utils/anthropicHelpers.test.ts | 2 +- .../openaiCompatibleFactory/index.test.ts | 382 ++++++++++++++++++ .../utils/openaiCompatibleFactory/index.ts | 5 +- src/libs/agent-runtime/zeroone/index.test.ts | 54 +-- src/libs/agent-runtime/zhipu/index.test.ts | 2 +- 14 files changed, 398 insertions(+), 487 deletions(-) create mode 100644 src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts diff --git a/src/libs/agent-runtime/groq/index.test.ts b/src/libs/agent-runtime/groq/index.test.ts index 67779be2eb26..e8363720fdb0 100644 --- a/src/libs/agent-runtime/groq/index.test.ts +++ b/src/libs/agent-runtime/groq/index.test.ts @@ -40,25 +40,7 @@ describe('LobeGroqAI', () => { }); describe('chat', () => { - it('should return a StreamingTextResponse on successful API call', async () => { - // Arrange - const mockStream = new ReadableStream(); - const mockResponse = Promise.resolve(mockStream); - - (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse); - - // Act - const result = await instance.chat({ - messages: [{ content: 'Hello', role: 'user' }], - model: 'mistralai/mistral-7b-instruct:free', - temperature: 0, - }); - - // Assert - expect(result).toBeInstanceOf(Response); - }); - - it('should call OpenRouter API with corresponding options', async () => { + it('should call chat with corresponding options', async () => { // Arrange const mockStream = new ReadableStream(); const mockResponse = Promise.resolve(mockStream); @@ -78,6 +60,7 @@ describe('LobeGroqAI', () => { expect(instance['client'].chat.completions.create).toHaveBeenCalledWith( { max_tokens: 1024, + stream: true, messages: [{ content: 'Hello', role: 'user' }], model: 'mistralai/mistral-7b-instruct:free', temperature: 0.7, @@ -253,59 +236,6 @@ describe('LobeGroqAI', () => { }); }); - describe('LobeGroqAI chat with callback and headers', () => { - it('should handle callback and headers correctly', async () => { - // 模拟 chat.completions.create 方法返回一个可读流 - const mockCreateMethod = vi - .spyOn(instance['client'].chat.completions, 'create') - .mockResolvedValue( - new ReadableStream({ - start(controller) { - controller.enqueue({ - id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO', - object: 'chat.completion.chunk', - created: 1709125675, - model: 'mistralai/mistral-7b-instruct:free', - system_fingerprint: 'fp_86156a94a0', - choices: [ - { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null }, - ], - }); - controller.close(); - }, - }) as any, - ); - - // 准备 callback 和 headers - const mockCallback: ChatStreamCallbacks = { - onStart: vi.fn(), - onToken: vi.fn(), - }; - const mockHeaders = { 'Custom-Header': 'TestValue' }; - - // 执行测试 - const result = await instance.chat( - { - messages: [{ content: 'Hello', role: 'user' }], - model: 'mistralai/mistral-7b-instruct:free', - temperature: 0, - }, - { callback: mockCallback, headers: mockHeaders }, - ); - - // 验证 callback 被调用 - await result.text(); // 确保流被消费 - expect(mockCallback.onStart).toHaveBeenCalled(); - expect(mockCallback.onToken).toHaveBeenCalledWith('hello'); - - // 验证 headers 被正确传递 - expect(result.headers.get('Custom-Header')).toEqual('TestValue'); - - // 清理 - mockCreateMethod.mockRestore(); - }); - }); - describe('DEBUG', () => { it('should call debugStream and return StreamingTextResponse when DEBUG_OPENROUTER_CHAT_COMPLETION is 1', async () => { // Arrange diff --git a/src/libs/agent-runtime/minimax/index.test.ts b/src/libs/agent-runtime/minimax/index.test.ts index 49c9ac5c3edd..efef06500edf 100644 --- a/src/libs/agent-runtime/minimax/index.test.ts +++ b/src/libs/agent-runtime/minimax/index.test.ts @@ -62,7 +62,7 @@ describe('LobeMinimaxAI', () => { temperature: 0, }); - expect(result).toBeInstanceOf(StreamingTextResponse); + expect(result).toBeInstanceOf(Response); }); it('should handle text messages correctly', async () => { @@ -85,7 +85,7 @@ describe('LobeMinimaxAI', () => { temperature: 0, }); - expect(result).toBeInstanceOf(StreamingTextResponse); + expect(result).toBeInstanceOf(Response); }); it('should call debugStream in DEBUG mode', async () => { diff --git a/src/libs/agent-runtime/mistral/index.test.ts b/src/libs/agent-runtime/mistral/index.test.ts index 1250d69ddc8b..7cb12c068fa8 100644 --- a/src/libs/agent-runtime/mistral/index.test.ts +++ b/src/libs/agent-runtime/mistral/index.test.ts @@ -287,59 +287,6 @@ describe('LobeMistralAI', () => { }); }); - describe('LobeMistralAI chat with callback and headers', () => { - it('should handle callback and headers correctly', async () => { - // 模拟 chat.completions.create 方法返回一个可读流 - const mockCreateMethod = vi - .spyOn(instance['client'].chat.completions, 'create') - .mockResolvedValue( - new ReadableStream({ - start(controller) { - controller.enqueue({ - id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO', - object: 'chat.completion.chunk', - created: 1709125675, - model: 'open-mistral-7b', - system_fingerprint: 'fp_86156a94a0', - choices: [ - { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null }, - ], - }); - controller.close(); - }, - }) as any, - ); - - // 准备 callback 和 headers - const mockCallback: ChatStreamCallbacks = { - onStart: vi.fn(), - onToken: vi.fn(), - }; - const mockHeaders = { 'Custom-Header': 'TestValue' }; - - // 执行测试 - const result = await instance.chat( - { - messages: [{ content: 'Hello', role: 'user' }], - model: 'open-mistral-7b', - temperature: 0, - }, - { callback: mockCallback, headers: mockHeaders }, - ); - - // 验证 callback 被调用 - await result.text(); // 确保流被消费 - expect(mockCallback.onStart).toHaveBeenCalled(); - expect(mockCallback.onToken).toHaveBeenCalledWith('hello'); - - // 验证 headers 被正确传递 - expect(result.headers.get('Custom-Header')).toEqual('TestValue'); - - // 清理 - mockCreateMethod.mockRestore(); - }); - }); - describe('DEBUG', () => { it('should call debugStream and return StreamingTextResponse when DEBUG_MISTRAL_CHAT_COMPLETION is 1', async () => { // Arrange diff --git a/src/libs/agent-runtime/moonshot/index.test.ts b/src/libs/agent-runtime/moonshot/index.test.ts index 16c0e69e319d..6de7a29cb2c8 100644 --- a/src/libs/agent-runtime/moonshot/index.test.ts +++ b/src/libs/agent-runtime/moonshot/index.test.ts @@ -40,24 +40,6 @@ describe('LobeMoonshotAI', () => { }); describe('chat', () => { - it('should return a StreamingTextResponse on successful API call', async () => { - // Arrange - const mockStream = new ReadableStream(); - const mockResponse = Promise.resolve(mockStream); - - (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse); - - // Act - const result = await instance.chat({ - messages: [{ content: 'Hello', role: 'user' }], - model: 'text-davinci-003', - temperature: 0, - }); - - // Assert - expect(result).toBeInstanceOf(Response); - }); - describe('Error', () => { it('should return OpenAIBizError with an openai error response when OpenAI.APIError is thrown', async () => { // Arrange @@ -223,59 +205,6 @@ describe('LobeMoonshotAI', () => { }); }); - describe('LobeMoonshotAI chat with callback and headers', () => { - it('should handle callback and headers correctly', async () => { - // 模拟 chat.completions.create 方法返回一个可读流 - const mockCreateMethod = vi - .spyOn(instance['client'].chat.completions, 'create') - .mockResolvedValue( - new ReadableStream({ - start(controller) { - controller.enqueue({ - id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO', - object: 'chat.completion.chunk', - created: 1709125675, - model: 'gpt-3.5-turbo-0125', - system_fingerprint: 'fp_86156a94a0', - choices: [ - { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null }, - ], - }); - controller.close(); - }, - }) as any, - ); - - // 准备 callback 和 headers - const mockCallback: ChatStreamCallbacks = { - onStart: vi.fn(), - onToken: vi.fn(), - }; - const mockHeaders = { 'Custom-Header': 'TestValue' }; - - // 执行测试 - const result = await instance.chat( - { - messages: [{ content: 'Hello', role: 'user' }], - model: 'text-davinci-003', - temperature: 0, - }, - { callback: mockCallback, headers: mockHeaders }, - ); - - // 验证 callback 被调用 - await result.text(); // 确保流被消费 - expect(mockCallback.onStart).toHaveBeenCalled(); - expect(mockCallback.onToken).toHaveBeenCalledWith('hello'); - - // 验证 headers 被正确传递 - expect(result.headers.get('Custom-Header')).toEqual('TestValue'); - - // 清理 - mockCreateMethod.mockRestore(); - }); - }); - describe('DEBUG', () => { it('should call debugStream and return StreamingTextResponse when DEBUG_MOONSHOT_CHAT_COMPLETION is 1', async () => { // Arrange @@ -306,6 +235,7 @@ describe('LobeMoonshotAI', () => { await instance.chat({ messages: [{ content: 'Hello', role: 'user' }], model: 'text-davinci-003', + stream: true, temperature: 0, }); diff --git a/src/libs/agent-runtime/openai/index.test.ts b/src/libs/agent-runtime/openai/index.test.ts index 6a4e8c9e508c..ef1591128314 100644 --- a/src/libs/agent-runtime/openai/index.test.ts +++ b/src/libs/agent-runtime/openai/index.test.ts @@ -190,59 +190,6 @@ describe('LobeOpenAI', () => { }); }); - describe('LobeOpenAI chat with callback and headers', () => { - it('should handle callback and headers correctly', async () => { - // 模拟 chat.completions.create 方法返回一个可读流 - const mockCreateMethod = vi - .spyOn(instance['client'].chat.completions, 'create') - .mockResolvedValue( - new ReadableStream({ - start(controller) { - controller.enqueue({ - id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO', - object: 'chat.completion.chunk', - created: 1709125675, - model: 'gpt-3.5-turbo-0125', - system_fingerprint: 'fp_86156a94a0', - choices: [ - { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null }, - ], - }); - controller.close(); - }, - }) as any, - ); - - // 准备 callback 和 headers - const mockCallback: ChatStreamCallbacks = { - onStart: vi.fn(), - onToken: vi.fn(), - }; - const mockHeaders = { 'Custom-Header': 'TestValue' }; - - // 执行测试 - const result = await instance.chat( - { - messages: [{ content: 'Hello', role: 'user' }], - model: 'text-davinci-003', - temperature: 0, - }, - { callback: mockCallback, headers: mockHeaders }, - ); - - // 验证 callback 被调用 - await result.text(); // 确保流被消费 - expect(mockCallback.onStart).toHaveBeenCalled(); - expect(mockCallback.onToken).toHaveBeenCalledWith('hello'); - - // 验证 headers 被正确传递 - expect(result.headers.get('Custom-Header')).toEqual('TestValue'); - - // 清理 - mockCreateMethod.mockRestore(); - }); - }); - describe('DEBUG', () => { it('should call debugStream and return StreamingTextResponse when DEBUG_OPENAI_CHAT_COMPLETION is 1', async () => { // Arrange diff --git a/src/libs/agent-runtime/openrouter/index.test.ts b/src/libs/agent-runtime/openrouter/index.test.ts index 306c9126af4c..96752c349b43 100644 --- a/src/libs/agent-runtime/openrouter/index.test.ts +++ b/src/libs/agent-runtime/openrouter/index.test.ts @@ -81,6 +81,7 @@ describe('LobeOpenRouterAI', () => { { max_tokens: 1024, messages: [{ content: 'Hello', role: 'user' }], + stream: true, model: 'mistralai/mistral-7b-instruct:free', temperature: 0.7, top_p: 1, @@ -255,59 +256,6 @@ describe('LobeOpenRouterAI', () => { }); }); - describe('LobeOpenRouterAI chat with callback and headers', () => { - it('should handle callback and headers correctly', async () => { - // 模拟 chat.completions.create 方法返回一个可读流 - const mockCreateMethod = vi - .spyOn(instance['client'].chat.completions, 'create') - .mockResolvedValue( - new ReadableStream({ - start(controller) { - controller.enqueue({ - id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO', - object: 'chat.completion.chunk', - created: 1709125675, - model: 'mistralai/mistral-7b-instruct:free', - system_fingerprint: 'fp_86156a94a0', - choices: [ - { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null }, - ], - }); - controller.close(); - }, - }) as any, - ); - - // 准备 callback 和 headers - const mockCallback: ChatStreamCallbacks = { - onStart: vi.fn(), - onToken: vi.fn(), - }; - const mockHeaders = { 'Custom-Header': 'TestValue' }; - - // 执行测试 - const result = await instance.chat( - { - messages: [{ content: 'Hello', role: 'user' }], - model: 'mistralai/mistral-7b-instruct:free', - temperature: 0, - }, - { callback: mockCallback, headers: mockHeaders }, - ); - - // 验证 callback 被调用 - await result.text(); // 确保流被消费 - expect(mockCallback.onStart).toHaveBeenCalled(); - expect(mockCallback.onToken).toHaveBeenCalledWith('hello'); - - // 验证 headers 被正确传递 - expect(result.headers.get('Custom-Header')).toEqual('TestValue'); - - // 清理 - mockCreateMethod.mockRestore(); - }); - }); - describe('DEBUG', () => { it('should call debugStream and return StreamingTextResponse when DEBUG_OPENROUTER_CHAT_COMPLETION is 1', async () => { // Arrange diff --git a/src/libs/agent-runtime/perplexity/index.test.ts b/src/libs/agent-runtime/perplexity/index.test.ts index 07e29fa5eaa3..6a7d0e8d99e6 100644 --- a/src/libs/agent-runtime/perplexity/index.test.ts +++ b/src/libs/agent-runtime/perplexity/index.test.ts @@ -40,24 +40,6 @@ describe('LobePerplexityAI', () => { }); describe('chat', () => { - it('should return a StreamingTextResponse on successful API call', async () => { - // Arrange - const mockStream = new ReadableStream(); - const mockResponse = Promise.resolve(mockStream); - - (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse); - - // Act - const result = await instance.chat({ - messages: [{ content: 'Hello', role: 'user' }], - model: 'text-davinci-003', - temperature: 0, - }); - - // Assert - expect(result).toBeInstanceOf(Response); - }); - describe('Error', () => { it('should return OpenAIBizError with an openai error response when OpenAI.APIError is thrown', async () => { // Arrange @@ -223,59 +205,6 @@ describe('LobePerplexityAI', () => { }); }); - describe('LobePerplexityAI chat with callback and headers', () => { - it('should handle callback and headers correctly', async () => { - // 模拟 chat.completions.create 方法返回一个可读流 - const mockCreateMethod = vi - .spyOn(instance['client'].chat.completions, 'create') - .mockResolvedValue( - new ReadableStream({ - start(controller) { - controller.enqueue({ - id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO', - object: 'chat.completion.chunk', - created: 1709125675, - model: 'gpt-3.5-turbo-0125', - system_fingerprint: 'fp_86156a94a0', - choices: [ - { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null }, - ], - }); - controller.close(); - }, - }) as any, - ); - - // 准备 callback 和 headers - const mockCallback: ChatStreamCallbacks = { - onStart: vi.fn(), - onToken: vi.fn(), - }; - const mockHeaders = { 'Custom-Header': 'TestValue' }; - - // 执行测试 - const result = await instance.chat( - { - messages: [{ content: 'Hello', role: 'user' }], - model: 'text-davinci-003', - temperature: 0, - }, - { callback: mockCallback, headers: mockHeaders }, - ); - - // 验证 callback 被调用 - await result.text(); // 确保流被消费 - expect(mockCallback.onStart).toHaveBeenCalled(); - expect(mockCallback.onToken).toHaveBeenCalledWith('hello'); - - // 验证 headers 被正确传递 - expect(result.headers.get('Custom-Header')).toEqual('TestValue'); - - // 清理 - mockCreateMethod.mockRestore(); - }); - }); - describe('DEBUG', () => { it('should call debugStream and return StreamingTextResponse when DEBUG_PERPLEXITY_CHAT_COMPLETION is 1', async () => { // Arrange diff --git a/src/libs/agent-runtime/perplexity/index.ts b/src/libs/agent-runtime/perplexity/index.ts index 302618f8096e..446e4389ad43 100644 --- a/src/libs/agent-runtime/perplexity/index.ts +++ b/src/libs/agent-runtime/perplexity/index.ts @@ -9,7 +9,7 @@ export const LobePerplexityAI = LobeOpenAICompatibleFactory({ chatCompletion: { handlePayload: (payload: ChatStreamPayload) => { // Set a default frequency penalty value greater than 0 - const { presence_penalty, frequency_penalty, ...res } = payload; + const { presence_penalty, frequency_penalty, stream = true, ...res } = payload; let param; @@ -22,8 +22,7 @@ export const LobePerplexityAI = LobeOpenAICompatibleFactory({ param = { frequency_penalty: frequency_penalty || defaultFrequencyPenalty }; } - console.log(param); - return { ...res, ...param } as OpenAI.ChatCompletionCreateParamsStreaming; + return { ...res, ...param, stream } as OpenAI.ChatCompletionCreateParamsStreaming; }, }, debug: { diff --git a/src/libs/agent-runtime/togetherai/index.test.ts b/src/libs/agent-runtime/togetherai/index.test.ts index 80e22bc13487..e13d62375ea2 100644 --- a/src/libs/agent-runtime/togetherai/index.test.ts +++ b/src/libs/agent-runtime/togetherai/index.test.ts @@ -81,6 +81,7 @@ describe('LobeTogetherAI', () => { messages: [{ content: 'Hello', role: 'user' }], model: 'mistralai/mistral-7b-instruct:free', temperature: 0.7, + stream: true, top_p: 1, }, { headers: { Accept: '*/*' } }, @@ -253,59 +254,6 @@ describe('LobeTogetherAI', () => { }); }); - describe('LobeTogetherAI chat with callback and headers', () => { - it('should handle callback and headers correctly', async () => { - // 模拟 chat.completions.create 方法返回一个可读流 - const mockCreateMethod = vi - .spyOn(instance['client'].chat.completions, 'create') - .mockResolvedValue( - new ReadableStream({ - start(controller) { - controller.enqueue({ - id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO', - object: 'chat.completion.chunk', - created: 1709125675, - model: 'mistralai/mistral-7b-instruct:free', - system_fingerprint: 'fp_86156a94a0', - choices: [ - { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null }, - ], - }); - controller.close(); - }, - }) as any, - ); - - // 准备 callback 和 headers - const mockCallback: ChatStreamCallbacks = { - onStart: vi.fn(), - onToken: vi.fn(), - }; - const mockHeaders = { 'Custom-Header': 'TestValue' }; - - // 执行测试 - const result = await instance.chat( - { - messages: [{ content: 'Hello', role: 'user' }], - model: 'mistralai/mistral-7b-instruct:free', - temperature: 0, - }, - { callback: mockCallback, headers: mockHeaders }, - ); - - // 验证 callback 被调用 - await result.text(); // 确保流被消费 - expect(mockCallback.onStart).toHaveBeenCalled(); - expect(mockCallback.onToken).toHaveBeenCalledWith('hello'); - - // 验证 headers 被正确传递 - expect(result.headers.get('Custom-Header')).toEqual('TestValue'); - - // 清理 - mockCreateMethod.mockRestore(); - }); - }); - describe('DEBUG', () => { it('should call debugStream and return StreamingTextResponse when DEBUG_TOGETHERAI_CHAT_COMPLETION is 1', async () => { // Arrange diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.test.ts b/src/libs/agent-runtime/utils/anthropicHelpers.test.ts index 8cf8d7f5e615..8f15e880c8c9 100644 --- a/src/libs/agent-runtime/utils/anthropicHelpers.test.ts +++ b/src/libs/agent-runtime/utils/anthropicHelpers.test.ts @@ -49,7 +49,7 @@ describe('anthropicHelpers', () => { role: 'system', }; const result = buildAnthropicMessage(message); - expect(result).toEqual({ content: [{ type: 'text', text: 'Hello!' }], role: 'assistant' }); + expect(result).toEqual({ content: [{ type: 'text', text: 'Hello!' }], role: 'user' }); }); }); diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts new file mode 100644 index 000000000000..621bd94f8583 --- /dev/null +++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts @@ -0,0 +1,382 @@ +// @vitest-environment node +import OpenAI from 'openai'; +import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { + AgentRuntimeErrorType, + ChatStreamCallbacks, + LobeOpenAICompatibleRuntime, + ModelProvider, +} from '@/libs/agent-runtime'; + +import * as debugStreamModule from '../debugStream'; +import { LobeOpenAICompatibleFactory } from './index'; + +const provider = 'groq'; +const defaultBaseURL = 'https://api.groq.com/openai/v1'; +const bizErrorType = 'GroqBizError'; +const invalidErrorType = 'InvalidGroqAPIKey'; + +// Mock the console.error to avoid polluting test output +vi.spyOn(console, 'error').mockImplementation(() => {}); + +let instance: LobeOpenAICompatibleRuntime; + +const LobeMockProvider = LobeOpenAICompatibleFactory({ + baseURL: defaultBaseURL, + chatCompletion: { + handleError: (error) => { + // 403 means the location is not supporteds + if (error.status === 403) + return { error, errorType: AgentRuntimeErrorType.LocationNotSupportError }; + }, + handlePayload: (payload) => { + return { + ...payload, + // disable stream for tools due to groq dont support + stream: !payload.tools, + } as any; + }, + }, + debug: { + chatCompletion: () => process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION === '1', + }, + errorType: { + bizError: AgentRuntimeErrorType.GroqBizError, + invalidAPIKey: AgentRuntimeErrorType.InvalidGroqAPIKey, + }, + provider: ModelProvider.Groq, +}); + +beforeEach(() => { + instance = new LobeMockProvider({ apiKey: 'test' }); + + // 使用 vi.spyOn 来模拟 chat.completions.create 方法 + vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue( + new ReadableStream() as any, + ); +}); + +afterEach(() => { + vi.clearAllMocks(); +}); + +describe('LobeOpenAICompatibleFactory', () => { + describe('init', () => { + it('should correctly initialize with an API key', async () => { + const instance = new LobeMockProvider({ apiKey: 'test_api_key' }); + expect(instance).toBeInstanceOf(LobeMockProvider); + expect(instance.baseURL).toEqual(defaultBaseURL); + }); + }); + + describe('chat', () => { + it('should return a StreamingTextResponse on successful API call', async () => { + // Arrange + const mockStream = new ReadableStream(); + const mockResponse = Promise.resolve(mockStream); + + (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse); + + // Act + const result = await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + + // Assert + expect(result).toBeInstanceOf(Response); + }); + + it('should call chat API with corresponding options', async () => { + // Arrange + const mockStream = new ReadableStream(); + const mockResponse = Promise.resolve(mockStream); + + (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse); + + // Act + const result = await instance.chat({ + max_tokens: 1024, + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0.7, + top_p: 1, + }); + + // Assert + expect(instance['client'].chat.completions.create).toHaveBeenCalledWith( + { + max_tokens: 1024, + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0.7, + stream: true, + top_p: 1, + }, + { headers: { Accept: '*/*' } }, + ); + expect(result).toBeInstanceOf(Response); + }); + + describe('Error', () => { + it('should return bizErrorType with an openai error response when OpenAI.APIError is thrown', async () => { + // Arrange + const apiError = new OpenAI.APIError( + 400, + { + status: 400, + error: { + message: 'Bad Request', + }, + }, + 'Error message', + {}, + ); + + vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError); + + // Act + try { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + } catch (e) { + expect(e).toEqual({ + endpoint: defaultBaseURL, + error: { + error: { message: 'Bad Request' }, + status: 400, + }, + errorType: bizErrorType, + provider, + }); + } + }); + + it('should throw AgentRuntimeError with invalidErrorType if no apiKey is provided', async () => { + try { + new LobeMockProvider({}); + } catch (e) { + expect(e).toEqual({ errorType: invalidErrorType }); + } + }); + + it('should return bizErrorType with the cause when OpenAI.APIError is thrown with cause', async () => { + // Arrange + const errorInfo = { + stack: 'abc', + cause: { + message: 'api is undefined', + }, + }; + const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {}); + + vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError); + + // Act + try { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + } catch (e) { + expect(e).toEqual({ + endpoint: defaultBaseURL, + error: { + cause: { message: 'api is undefined' }, + stack: 'abc', + }, + errorType: bizErrorType, + provider, + }); + } + }); + + it('should return bizErrorType with an cause response with desensitize Url', async () => { + // Arrange + const errorInfo = { + stack: 'abc', + cause: { message: 'api is undefined' }, + }; + const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {}); + + instance = new LobeMockProvider({ + apiKey: 'test', + + baseURL: 'https://api.abc.com/v1', + }); + + vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError); + + // Act + try { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + } catch (e) { + expect(e).toEqual({ + endpoint: 'https://api.***.com/v1', + error: { + cause: { message: 'api is undefined' }, + stack: 'abc', + }, + errorType: bizErrorType, + provider, + }); + } + }); + + it('should throw an InvalidOpenRouterAPIKey error type on 401 status code', async () => { + // Mock the API call to simulate a 401 error + const error = new Error('Unauthorized') as any; + error.status = 401; + vi.mocked(instance['client'].chat.completions.create).mockRejectedValue(error); + + try { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + } catch (e) { + // Expect the chat method to throw an error with InvalidMoonshotAPIKey + expect(e).toEqual({ + endpoint: defaultBaseURL, + error: new Error('Unauthorized'), + errorType: invalidErrorType, + provider, + }); + } + }); + + it('should return AgentRuntimeError for non-OpenAI errors', async () => { + // Arrange + const genericError = new Error('Generic Error'); + + vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(genericError); + + // Act + try { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + } catch (e) { + expect(e).toEqual({ + endpoint: defaultBaseURL, + errorType: 'AgentRuntimeError', + provider, + error: { + name: genericError.name, + cause: genericError.cause, + message: genericError.message, + stack: genericError.stack, + }, + }); + } + }); + }); + + describe('chat with callback and headers', () => { + it('should handle callback and headers correctly', async () => { + // 模拟 chat.completions.create 方法返回一个可读流 + const mockCreateMethod = vi + .spyOn(instance['client'].chat.completions, 'create') + .mockResolvedValue( + new ReadableStream({ + start(controller) { + controller.enqueue({ + id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO', + object: 'chat.completion.chunk', + created: 1709125675, + model: 'mistralai/mistral-7b-instruct:free', + system_fingerprint: 'fp_86156a94a0', + choices: [ + { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null }, + ], + }); + controller.close(); + }, + }) as any, + ); + + // 准备 callback 和 headers + const mockCallback: ChatStreamCallbacks = { + onStart: vi.fn(), + onToken: vi.fn(), + }; + const mockHeaders = { 'Custom-Header': 'TestValue' }; + + // 执行测试 + const result = await instance.chat( + { + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }, + { callback: mockCallback, headers: mockHeaders }, + ); + + // 验证 callback 被调用 + await result.text(); // 确保流被消费 + expect(mockCallback.onStart).toHaveBeenCalled(); + expect(mockCallback.onToken).toHaveBeenCalledWith('hello'); + + // 验证 headers 被正确传递 + expect(result.headers.get('Custom-Header')).toEqual('TestValue'); + + // 清理 + mockCreateMethod.mockRestore(); + }); + }); + + describe('DEBUG', () => { + it('should call debugStream and return StreamingTextResponse when DEBUG_OPENROUTER_CHAT_COMPLETION is 1', async () => { + // Arrange + const mockProdStream = new ReadableStream() as any; // 模拟的 prod 流 + const mockDebugStream = new ReadableStream({ + start(controller) { + controller.enqueue('Debug stream content'); + controller.close(); + }, + }) as any; + mockDebugStream.toReadableStream = () => mockDebugStream; // 添加 toReadableStream 方法 + + // 模拟 chat.completions.create 返回值,包括模拟的 tee 方法 + (instance['client'].chat.completions.create as Mock).mockResolvedValue({ + tee: () => [mockProdStream, { toReadableStream: () => mockDebugStream }], + }); + + // 保存原始环境变量值 + const originalDebugValue = process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION; + + // 模拟环境变量 + process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION = '1'; + vi.spyOn(debugStreamModule, 'debugStream').mockImplementation(() => Promise.resolve()); + + // 执行测试 + // 运行你的测试函数,确保它会在条件满足时调用 debugStream + // 假设的测试函数调用,你可能需要根据实际情况调整 + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + + // 验证 debugStream 被调用 + expect(debugStreamModule.debugStream).toHaveBeenCalled(); + + // 恢复原始环境变量值 + process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION = originalDebugValue; + }); + }); + }); +}); diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts index dda87d6ffab3..ece97dbf9834 100644 --- a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts +++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts @@ -73,7 +73,10 @@ export const LobeOpenAICompatibleFactory = ({ try { const postPayload = chatCompletion?.handlePayload ? chatCompletion.handlePayload(payload) - : (payload as unknown as OpenAI.ChatCompletionCreateParamsStreaming); + : ({ + ...payload, + stream: payload.stream ?? true, + } as OpenAI.ChatCompletionCreateParamsStreaming); const response = await this.client.chat.completions.create(postPayload, { // https://github.com/lobehub/lobe-chat/pull/318 diff --git a/src/libs/agent-runtime/zeroone/index.test.ts b/src/libs/agent-runtime/zeroone/index.test.ts index 896e2bc3174e..7089b9a7cddb 100644 --- a/src/libs/agent-runtime/zeroone/index.test.ts +++ b/src/libs/agent-runtime/zeroone/index.test.ts @@ -81,6 +81,7 @@ describe('LobeZeroOneAI', () => { messages: [{ content: 'Hello', role: 'user' }], model: 'yi-34b-chat-0205', temperature: 0.7, + stream: true, top_p: 1, }, { headers: { Accept: '*/*' } }, @@ -253,59 +254,6 @@ describe('LobeZeroOneAI', () => { }); }); - describe('LobeZeroOneAI chat with callback and headers', () => { - it('should handle callback and headers correctly', async () => { - // 模拟 chat.completions.create 方法返回一个可读流 - const mockCreateMethod = vi - .spyOn(instance['client'].chat.completions, 'create') - .mockResolvedValue( - new ReadableStream({ - start(controller) { - controller.enqueue({ - id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO', - object: 'chat.completion.chunk', - created: 1709125675, - model: 'yi-34b-chat-0205', - system_fingerprint: 'fp_86156a94a0', - choices: [ - { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null }, - ], - }); - controller.close(); - }, - }) as any, - ); - - // 准备 callback 和 headers - const mockCallback: ChatStreamCallbacks = { - onStart: vi.fn(), - onToken: vi.fn(), - }; - const mockHeaders = { 'Custom-Header': 'TestValue' }; - - // 执行测试 - const result = await instance.chat( - { - messages: [{ content: 'Hello', role: 'user' }], - model: 'yi-34b-chat-0205', - temperature: 0, - }, - { callback: mockCallback, headers: mockHeaders }, - ); - - // 验证 callback 被调用 - await result.text(); // 确保流被消费 - expect(mockCallback.onStart).toHaveBeenCalled(); - expect(mockCallback.onToken).toHaveBeenCalledWith('hello'); - - // 验证 headers 被正确传递 - expect(result.headers.get('Custom-Header')).toEqual('TestValue'); - - // 清理 - mockCreateMethod.mockRestore(); - }); - }); - describe('DEBUG', () => { it('should call debugStream and return StreamingTextResponse when DEBUG_ZEROONE_CHAT_COMPLETION is 1', async () => { // Arrange diff --git a/src/libs/agent-runtime/zhipu/index.test.ts b/src/libs/agent-runtime/zhipu/index.test.ts index b385085d1e00..5edd8fcb1a3a 100644 --- a/src/libs/agent-runtime/zhipu/index.test.ts +++ b/src/libs/agent-runtime/zhipu/index.test.ts @@ -61,7 +61,7 @@ describe('LobeZhipuAI', () => { model: 'glm-4', temperature: 0, }); - expect(result).toBeInstanceOf(StreamingTextResponse); + expect(result).toBeInstanceOf(Response); }); it('should handle callback and headers correctly', async () => { From 6c07cf943ddf18724f58f26a54f5a3e2edd75bd1 Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Sat, 11 May 2024 05:54:49 +0000 Subject: [PATCH 16/24] =?UTF-8?q?=F0=9F=97=83=EF=B8=8F=20refactor:=20impor?= =?UTF-8?q?t=20and=20export=20tool=20migration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../client/models/__tests__/message.test.ts | 2 +- src/database/client/schemas/message.ts | 3 +- src/features/Conversation/Actions/index.ts | 2 - .../fixtures/from-v1-to-v5-output.json | 245 ++++++++++++++++++ .../fixtures/function-input-v4.json | 96 +++++++ .../fixtures/function-output-v5.json | 120 +++++++++ src/migrations/FromV4ToV5/index.ts | 58 +++++ src/migrations/FromV4ToV5/migrations.test.ts | 49 ++++ src/migrations/FromV4ToV5/types/v4.ts | 21 ++ src/migrations/FromV4ToV5/types/v5.ts | 27 ++ src/migrations/index.ts | 9 +- src/services/chat.ts | 8 +- src/store/chat/slices/message/action.ts | 2 +- src/store/chat/slices/share/action.test.ts | 4 +- src/store/chat/slices/share/action.ts | 2 +- src/types/llm.ts | 2 +- src/types/message/index.ts | 9 +- 17 files changed, 635 insertions(+), 24 deletions(-) create mode 100644 src/migrations/FromV4ToV5/fixtures/from-v1-to-v5-output.json create mode 100644 src/migrations/FromV4ToV5/fixtures/function-input-v4.json create mode 100644 src/migrations/FromV4ToV5/fixtures/function-output-v5.json create mode 100644 src/migrations/FromV4ToV5/index.ts create mode 100644 src/migrations/FromV4ToV5/migrations.test.ts create mode 100644 src/migrations/FromV4ToV5/types/v4.ts create mode 100644 src/migrations/FromV4ToV5/types/v5.ts diff --git a/src/database/client/models/__tests__/message.test.ts b/src/database/client/models/__tests__/message.test.ts index 4c6402495a71..26687e7c6b0a 100644 --- a/src/database/client/models/__tests__/message.test.ts +++ b/src/database/client/models/__tests__/message.test.ts @@ -264,7 +264,7 @@ describe('MessageModel', () => { it('should update a role and plugins', async () => { const createdMessage = await MessageModel.create(messageData); const updateData = { - role: 'function' as const, + role: 'tool' as const, plugin: { apiName: 'a', identifier: 'b', arguments: 'abc' }, }; diff --git a/src/database/client/schemas/message.ts b/src/database/client/schemas/message.ts index ba4d2cfe493b..feddb3960eb5 100644 --- a/src/database/client/schemas/message.ts +++ b/src/database/client/schemas/message.ts @@ -1,7 +1,6 @@ /* eslint-disable sort-keys-fix/sort-keys-fix */ import { z } from 'zod'; - const TranslateSchema = z.object({ from: z.string().optional(), to: z.string(), @@ -20,7 +19,7 @@ const ToolCallSchema = PluginSchema.extend({ }); export const DB_MessageSchema = z.object({ - role: z.enum(['user', 'system', 'assistant', 'function', 'tool']), + role: z.enum(['user', 'system', 'assistant', 'tool']), content: z.string(), files: z.array(z.string()).optional(), favorite: z.number().int().min(0).max(1).optional(), diff --git a/src/features/Conversation/Actions/index.ts b/src/features/Conversation/Actions/index.ts index dfa380401829..c18508dc5cea 100644 --- a/src/features/Conversation/Actions/index.ts +++ b/src/features/Conversation/Actions/index.ts @@ -8,13 +8,11 @@ import { LLMRoleType } from '@/types/llm'; import { OnActionsClick, RenderAction } from '../types'; import { AssistantActionsBar } from './Assistant'; import { DefaultActionsBar } from './Fallback'; -import { FunctionActionsBar } from './Function'; import { ToolActionsBar } from './Tool'; import { UserActionsBar } from './User'; export const renderActions: Record = { assistant: AssistantActionsBar, - function: FunctionActionsBar, system: DefaultActionsBar, tool: ToolActionsBar, user: UserActionsBar, diff --git a/src/migrations/FromV4ToV5/fixtures/from-v1-to-v5-output.json b/src/migrations/FromV4ToV5/fixtures/from-v1-to-v5-output.json new file mode 100644 index 000000000000..dc0fd7d910e3 --- /dev/null +++ b/src/migrations/FromV4ToV5/fixtures/from-v1-to-v5-output.json @@ -0,0 +1,245 @@ +{ + "exportType": "sessions", + "state": { + "sessions": [ + { + "config": { + "displayMode": "chat", + "historyCount": 1, + "model": "gpt-3.5-turbo", + "params": { + "frequency_penalty": 0, + "presence_penalty": 0, + "temperature": 0.6, + "top_p": 1, + "max_tokens": 2200 + }, + "plugins": [], + "systemRole": "你是一名 Postgresql 的数据库专家。用户是一名数据库小白,你需要使用简单直白的方式告诉用户如何使用 ostgresql 和它相应的 orm 工具 primsa", + "enableCompressThreshold": false, + "enableMaxTokens": false, + "inputTemplate": "" + }, + "id": "06cc3e20-e870-4099-a619-c07a849d742d", + "meta": { + "avatar": "🐘", + "backgroundColor": "rgba(0,0,0,0)", + "title": "Postgresql 指南", + "description": "PostgreSQL 数据库和 Prisma ORM 工具的简单使用指南", + "tags": ["数据库", "PostgreSQL", "教程", "ORM", "Prisma"] + }, + "type": "agent", + "createdAt": 1698463356269, + "group": "default", + "updatedAt": 1698463356269, + "pinned": true + } + ], + "messages": [ + { + "content": "是的,你可以使用 Prisma 来自定义生成 `id` 的逻辑,例如使用 `nanoid` 库来生成唯一标识符。\n\n首先,你需要在项目中安装 `nanoid` 库:\n\n```\nnpm install nanoid\n```\n\n然后,你可以在 Prisma 的模型定义中使用 `@default` 指令,并提供一个自定义的默认值函数来生成 `id`。\n\n```prisma\nmodel Message {\n id String @id @default(dbgenerated(\"nanoid(10)\"))\n\n createdAt DateTime @default(now())\n updatedAt DateTime @updatedAt\n}\n```\n\n在上述模型定义中,我们使用 `dbgenerated` 指令来定义一个自定义的默认值函数。函数 `nanoid(10)` 会生成一个长度为 10 的唯一标识符。\n\n请注意,使用自定义的 `id` 生成逻辑可能会对数据库性能产生一些影响,因为每次创建新记录时都需要执行自定义函数。因此,请确保你的自定义逻辑是高效的,并在实际使用中进行测试和评估性能。", + "id": "n2Tsl3Dm", + "meta": {}, + "parentId": "JSZaJOGj", + "role": "assistant", + "topicId": "CZg3IEdy", + "createdAt": 1698463981313, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698464004494, + "fromModel": "gpt-3.5-turbo" + }, + { + "content": "如何指定 自定义函数的引入位置?", + "id": "xPft8Msa", + "meta": {}, + "role": "user", + "topicId": "CZg3IEdy", + "createdAt": 1698464173224, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698464173226 + }, + { + "content": "在 Prisma 中,你可以通过在 `schema.prisma` 文件的顶部使用 `generator` 块来指定自定义函数的引入位置。\n\n例如,假设你想使用 `nanoid` 库来生成自定义的 `id`,你可以按照以下方式配置:\n\n```prisma\ngenerator client {\n provider = \"prisma-client-js\"\n output = \"./node_modules/@prisma/client\"\n}\n\ngenerator dbgenerated {\n provider = \"prisma-dbgenerator\"\n output = \"./node_modules/@prisma/client\"\n binaryTargets = [\"native\"]\n previewFeatures = [\"nativeTypes\"]\n config = {\n \"generator\": {\n \"dbgenerated\": {\n \"output\": \"./src/db/generated\",\n \"provider\": \"./src/db/generated/provider.js\"\n }\n }\n }\n}\n```\n\n在上述配置中,我们定义了两个生成器。`client` 生成器用于生成 Prisma 客户端代码,而 `dbgenerated` 生成器用于生成数据库相关的代码。\n\n注意到 `dbgenerated` 生成器的 `config` 部分中有一个 `provider` 字段,它指定了自定义函数的引入位置。在这个示例中,我们将自定义函数放在 `./src/db/generated/provider.js` 文件中。\n\n你可以根据你的项目结构和需要来调整路径和文件名,确保指定的 `provider` 路径正确引入了自定义函数。", + "id": "jZc2KKRs", + "meta": {}, + "parentId": "xPft8Msa", + "role": "assistant", + "topicId": "CZg3IEdy", + "createdAt": 1698464173228, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698464181896, + "fromModel": "gpt-3.5-turbo" + }, + { + "content": "如何启动一个 postgres Docker 镜像?", + "id": "LjG9XSuq", + "meta": {}, + "role": "user", + "topicId": "MNWhBEh5", + "createdAt": 1698754460196, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698754463118 + }, + { + "content": "要启动一个 PostgreSQL Docker 镜像,你可以按照以下步骤操作:\n\n1. 首先,确保你已经安装了 Docker,可以在终端中运行 `docker --version` 来检查。\n\n2. 在终端中运行以下命令以下载并启动一个 PostgreSQL 镜像:\n\n ```shell\n docker run --name my-postgres -e POSTGRES_PASSWORD=mysecretpassword -p 5432:5432 -d postgres\n ```\n\n 这将从 Docker Hub 下载 PostgreSQL 镜像", + "id": "CL7E1Mc5", + "meta": {}, + "parentId": "LjG9XSuq", + "role": "assistant", + "topicId": "MNWhBEh5", + "createdAt": 1698754479751, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698754507690, + "fromModel": "gpt-3.5-turbo" + }, + { + "content": "如何指定本地存储的目录到 /www/database/postgres", + "id": "UIvHZr7m", + "meta": {}, + "role": "user", + "topicId": "MNWhBEh5", + "createdAt": 1698754591763, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698754591764 + }, + { + "content": "要将本地存储的目录映射到容器内的 `/var/lib/postgresql/data` 目录,你可以使用 Docker 的 `-v` 参数。\n\n以下是示例命令:\n\n```shell\ndocker run --name my-postgres -e POSTGRES_PASSWORD=mysecretpassword -p 5432:5432 -v /www/database/postgres:/var/lib/postgresql/data -d postgres\n```\n\n在这个命令中,我们使用了 `-v` 参数来指定本地目录 `/www/database/postgres` 映射到容器内的 `/var/lib/postgresql/data` 目录。这样,所有的 PostgreSQL 数据将会保存在本地指定的目录中。", + "id": "XLJOK44r", + "meta": {}, + "parentId": "UIvHZr7m", + "role": "assistant", + "topicId": "MNWhBEh5", + "createdAt": 1698754591765, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698754609275, + "fromModel": "gpt-3.5-turbo" + }, + { + "content": "docker run --name postgres -e POSTGRES_PASSWORD=DSLapHYdzNrai5j6 -p 33300:5432 -v /www/database/postgres:/var/lib/postgresql/data -d postgres", + "id": "W3J6jQjm", + "meta": {}, + "role": "user", + "topicId": "MNWhBEh5", + "createdAt": 1698754700529, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698754700530 + }, + { + "content": "如何启动 pgadmin", + "id": "L7mq0ehV", + "meta": {}, + "role": "user", + "topicId": "MNWhBEh5", + "createdAt": 1698755749886, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698755749888 + }, + { + "content": "要启动 pgAdmin,你可以按照以下步骤操作:\n\n1. 首先,确保你已经安装了 Docker,并且已经成功启动了 PostgreSQL Docker 容器。\n\n2. 在终端中运行以下命令以启动 pgAdmin Docker 容器:\n\n ```shell\n docker run --name my-pgadmin -p 5050:80 -e PGADMIN_DEFAULT_EMAIL=myemail@example.com -e PGADMIN_DEFAULT_PASSWORD=mypassword -d dpage/pgadmin4\n ```\n\n 在这个命令中,我们使用了 `-p` 参数来将容器的端口 80 映射到本地的端口 5050。你可以根据需要修改这个端口映射。\n\n 我们还使用了 `-e` 参数来设置 pgAdmin 的默认登录邮箱和密码。你可以根据需要修改这些值。\n\n3. 在浏览器中访问 `http://localhost:5050`,你将看到 pgAdmin 的登录界面。\n\n4. 使用在上一步中设置的邮箱和密码进行登录。\n\n5. 在 pgAdmin 中,你可以添加一个新的服务器连接,以连接到运行在 Docker 容器中的 PostgreSQL 数据库。要添加一个新的服务器连接,你需要提供以下信息:\n\n - 主机名/地址:使用 Docker 容器的 IP 地址或者 `localhost`。\n - 端口:使用 Docker 容器中 PostgreSQL 的映射端口,例如 33300。\n - 用户名和密码:使用在启动 PostgreSQL 容器时设置的用户名和密码。\n\n 完成上述信息的填写后,点击保存并连接到 PostgreSQL 服务器。\n\n现在,你已经成功启动了 pgAdmin 并连接到了你的 PostgreSQL 数据库。你可以使用 pgAdmin 来管理和操作你的数据库。", + "id": "d5XTX9EQ", + "meta": {}, + "parentId": "L7mq0ehV", + "role": "assistant", + "topicId": "MNWhBEh5", + "createdAt": 1698755749889, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698755786183, + "fromModel": "gpt-3.5-turbo" + }, + { + "content": "", + "id": "tool_calls_KPPDiRyW", + "meta": {}, + "parentId": "42k72jMi", + "role": "assistant", + "createdAt": 1690650544842, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1690650572389, + "fromModel": "gpt-3.5-turbo-16k", + "tools": [ + { + "apiName": "websiteCrawler", + "arguments": "{\n \"url\": \"https://mp.weixin.qq.com/s?__biz=MjM5MTA1MjAxMQ==&mid=2651264337&idx=1&sn=d7d9126578c74c912e1f0d42cb5629da&chksm=bd48ccd58a3f45c3f2cbc7d7b732c820b9e7cd6b547c06acc8170b233710b5fb5ed62f5fd94d&mpshare=1&scene=1&srcid=07294Mpw7C6JCLgtagL1cBDR&sharer_sharetime=1690622712877&sharer_shareid=0358058a42fc25387d28337fc3d22c3c#rd\"\n}", + "id": "tool_call_KPPDiRyW", + "identifier": "websiteCrawler", + "type": "default" + } + ] + }, + { + "content": "abcabc", + "id": "KPPDiRyW", + "meta": {}, + "parentId": "tool_calls_KPPDiRyW", + "role": "tool", + "createdAt": 1690650544852, + "plugin": { + "apiName": "websiteCrawler", + "arguments": "{\n \"url\": \"https://mp.weixin.qq.com/s?__biz=MjM5MTA1MjAxMQ==&mid=2651264337&idx=1&sn=d7d9126578c74c912e1f0d42cb5629da&chksm=bd48ccd58a3f45c3f2cbc7d7b732c820b9e7cd6b547c06acc8170b233710b5fb5ed62f5fd94d&mpshare=1&scene=1&srcid=07294Mpw7C6JCLgtagL1cBDR&sharer_sharetime=1690622712877&sharer_shareid=0358058a42fc25387d28337fc3d22c3c#rd\"\n}", + "identifier": "websiteCrawler", + "type": "default" + }, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1690650572399, + "fromModel": "gpt-3.5-turbo-16k", + "tool_call_id": "tool_call_KPPDiRyW" + }, + { + "content": "", + "id": "tool_calls_9cRjevRQ", + "meta": {}, + "parentId": "3nDXtEKv", + "role": "assistant", + "createdAt": 1700065743395, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1700065751851, + "fromModel": "gpt-3.5-turbo-16k", + "tools": [ + { + "apiName": "getWebsiteContent", + "arguments": "{\n \"url\": \"https://nodejs.org/api/packages.html#dual-package-hazard\"\n}", + "id": "tool_call_9cRjevRQ", + "identifier": "website-crawler", + "type": "default" + } + ] + }, + { + "content": "bbbbb", + "id": "9cRjevRQ", + "meta": {}, + "parentId": "tool_calls_9cRjevRQ", + "role": "tool", + "plugin": { + "apiName": "getWebsiteContent", + "arguments": "{\n \"url\": \"https://nodejs.org/api/packages.html#dual-package-hazard\"\n}", + "identifier": "website-crawler", + "type": "default" + }, + "createdAt": 1700065743405, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1700065751861, + "fromModel": "gpt-3.5-turbo-16k", + "tool_call_id": "tool_call_9cRjevRQ" + } + ], + "topics": [ + { + "id": "CZg3IEdy", + "title": "Prisma中的cuid函数作用是什么", + "createdAt": 1698463911747, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698463915716 + }, + { + "id": "MNWhBEh5", + "title": "启动 PostgreSQL Docker 镜像方法", + "createdAt": 1698754463117, + "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d", + "updatedAt": 1698754464867 + } + ], + "sessionGroups": [] + }, + "version": 5 +} diff --git a/src/migrations/FromV4ToV5/fixtures/function-input-v4.json b/src/migrations/FromV4ToV5/fixtures/function-input-v4.json new file mode 100644 index 000000000000..f83e41855a29 --- /dev/null +++ b/src/migrations/FromV4ToV5/fixtures/function-input-v4.json @@ -0,0 +1,96 @@ +{ + "exportType": "sessions", + "state": { + "messages": [ + { + "role": "function", + "content": "[{\"city\":\"杭州市\",\"adcode\":\"330100\",\"province\":\"浙江\",\"reporttime\":\"2024-05-11 10:32:32\",\"casts\":[{\"date\":\"2024-05-11\",\"week\":\"6\",\"dayweather\":\"中雨\",\"nightweather\":\"中雨\",\"daytemp\":\"25\",\"nighttemp\":\"18\",\"daywind\":\"南\",\"nightwind\":\"南\",\"daypower\":\"4\",\"nightpower\":\"4\",\"daytemp_float\":\"25.0\",\"nighttemp_float\":\"18.0\"},{\"date\":\"2024-05-12\",\"week\":\"7\",\"dayweather\":\"多云\",\"nightweather\":\"晴\",\"daytemp\":\"25\",\"nighttemp\":\"16\",\"daywind\":\"北\",\"nightwind\":\"北\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"25.0\",\"nighttemp_float\":\"16.0\"},{\"date\":\"2024-05-13\",\"week\":\"1\",\"dayweather\":\"晴\",\"nightweather\":\"晴\",\"daytemp\":\"28\",\"nighttemp\":\"16\",\"daywind\":\"东北\",\"nightwind\":\"东北\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"28.0\",\"nighttemp_float\":\"16.0\"},{\"date\":\"2024-05-14\",\"week\":\"2\",\"dayweather\":\"多云\",\"nightweather\":\"阴\",\"daytemp\":\"28\",\"nighttemp\":\"15\",\"daywind\":\"东\",\"nightwind\":\"东\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"28.0\",\"nighttemp_float\":\"15.0\"}]}]", + "fromModel": "gpt-3.5-turbo", + "fromProvider": "openai", + "parentId": "WwRdrdCz", + "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f", + "topicId": "yjkdZafZ", + "createdAt": 1715395810026, + "id": "UQ5VAGkk", + "updatedAt": 1715395822026, + "traceId": "4a8381a2-5582-46ce-87d8-c0f010de5c68", + "observationId": "904f7ab0-4066-4f9b-84fa-b1677974f976", + "plugin": { + "apiName": "fetchCurrentWeather", + "arguments": "{\n \"city\": \"杭州\"\n}", + "identifier": "realtime-weather", + "type": "default" + } + }, + { + "role": "user", + "content": "杭州天气如何?\n\n", + "files": [], + "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f", + "topicId": "yjkdZafZ", + "createdAt": 1715394828164, + "id": "WwRdrdCz", + "updatedAt": 1715394828164 + }, + { + "role": "assistant", + "content": "杭州的天气情况如下:\n\n- 今天(5月11日)白天和晚上都有中雨,白天温度为25摄氏度,晚上温度为18摄氏度,南风,风力为4级。\n- 明天(5月12日)白天多云,晚上晴,白天温度为25摄氏度,晚上温度为16摄氏度,北风,风力为1-3级。\n- 后天(5月13日)白天和晚上都是晴天,白天温度为28摄氏度,晚上温度为16摄氏度,东北风,风力为1-3级。\n- 大后天(5月14日)白天多云,晚上阴天,白天温度为28摄氏度,晚上温度为15摄氏度,东风,风力为1-3级。\n\n请注意根据实际情况做好防雨和保暖措施。", + "fromModel": "gpt-3.5-turbo", + "fromProvider": "openai", + "parentId": "UQ5VAGkk", + "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f", + "topicId": "yjkdZafZ", + "createdAt": 1715395822125, + "id": "hbqsYoMn", + "updatedAt": 1715395830606, + "traceId": "4a8381a2-5582-46ce-87d8-c0f010de5c68", + "observationId": "cad6a5bd-6a7a-4ff9-b37a-3deaa23b094a" + } + ], + "sessions": [ + { + "config": { + "autoCreateTopicThreshold": 2, + "displayMode": "chat", + "enableAutoCreateTopic": true, + "historyCount": 1, + "model": "gpt-3.5-turbo", + "params": { + "frequency_penalty": 0, + "presence_penalty": 0, + "temperature": 0.6, + "top_p": 1 + }, + "plugins": ["realtime-weather", "steam"], + "provider": "openai", + "systemRole": "", + "tts": { + "showAllLocaleVoice": false, + "sttLocale": "auto", + "ttsService": "openai", + "voice": { "openai": "alloy" } + } + }, + "group": "default", + "meta": {}, + "pinned": false, + "type": "agent", + "createdAt": 1715394818440, + "id": "8eb073bb-e133-4dbd-81a2-5305a8447b5f", + "updatedAt": 1715394822366, + "model": "gpt-3.5-turbo" + } + ], + "topics": [ + { + "title": "杭州天气查询", + "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f", + "createdAt": 1715394835099, + "id": "yjkdZafZ", + "updatedAt": 1715394838061, + "favorite": false + } + ] + }, + "version": 4 +} diff --git a/src/migrations/FromV4ToV5/fixtures/function-output-v5.json b/src/migrations/FromV4ToV5/fixtures/function-output-v5.json new file mode 100644 index 000000000000..439747b37f6b --- /dev/null +++ b/src/migrations/FromV4ToV5/fixtures/function-output-v5.json @@ -0,0 +1,120 @@ +{ + "exportType": "sessions", + "state": { + "messages": [ + { + "role": "assistant", + "content": "", + "fromModel": "gpt-3.5-turbo", + "fromProvider": "openai", + "parentId": "WwRdrdCz", + "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f", + "topicId": "yjkdZafZ", + "createdAt": 1715395810016, + "id": "tool_calls_UQ5VAGkk", + "updatedAt": 1715395822016, + "traceId": "4a8381a2-5582-46ce-87d8-c0f010de5c68", + "observationId": "904f7ab0-4066-4f9b-84fa-b1677974f976", + "tools": [ + { + "id": "tool_call_UQ5VAGkk", + "apiName": "fetchCurrentWeather", + "arguments": "{\n \"city\": \"杭州\"\n}", + "identifier": "realtime-weather", + "type": "default" + } + ] + }, + { + "role": "tool", + "content": "[{\"city\":\"杭州市\",\"adcode\":\"330100\",\"province\":\"浙江\",\"reporttime\":\"2024-05-11 10:32:32\",\"casts\":[{\"date\":\"2024-05-11\",\"week\":\"6\",\"dayweather\":\"中雨\",\"nightweather\":\"中雨\",\"daytemp\":\"25\",\"nighttemp\":\"18\",\"daywind\":\"南\",\"nightwind\":\"南\",\"daypower\":\"4\",\"nightpower\":\"4\",\"daytemp_float\":\"25.0\",\"nighttemp_float\":\"18.0\"},{\"date\":\"2024-05-12\",\"week\":\"7\",\"dayweather\":\"多云\",\"nightweather\":\"晴\",\"daytemp\":\"25\",\"nighttemp\":\"16\",\"daywind\":\"北\",\"nightwind\":\"北\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"25.0\",\"nighttemp_float\":\"16.0\"},{\"date\":\"2024-05-13\",\"week\":\"1\",\"dayweather\":\"晴\",\"nightweather\":\"晴\",\"daytemp\":\"28\",\"nighttemp\":\"16\",\"daywind\":\"东北\",\"nightwind\":\"东北\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"28.0\",\"nighttemp_float\":\"16.0\"},{\"date\":\"2024-05-14\",\"week\":\"2\",\"dayweather\":\"多云\",\"nightweather\":\"阴\",\"daytemp\":\"28\",\"nighttemp\":\"15\",\"daywind\":\"东\",\"nightwind\":\"东\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"28.0\",\"nighttemp_float\":\"15.0\"}]}]", + "fromModel": "gpt-3.5-turbo", + "fromProvider": "openai", + "parentId": "tool_calls_UQ5VAGkk", + "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f", + "topicId": "yjkdZafZ", + "createdAt": 1715395810026, + "id": "UQ5VAGkk", + "updatedAt": 1715395822026, + "traceId": "4a8381a2-5582-46ce-87d8-c0f010de5c68", + "observationId": "904f7ab0-4066-4f9b-84fa-b1677974f976", + "tool_call_id": "tool_call_UQ5VAGkk", + "plugin": { + "apiName": "fetchCurrentWeather", + "arguments": "{\n \"city\": \"杭州\"\n}", + "identifier": "realtime-weather", + "type": "default" + } + }, + { + "role": "user", + "content": "杭州天气如何?\n\n", + "files": [], + "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f", + "topicId": "yjkdZafZ", + "createdAt": 1715394828164, + "id": "WwRdrdCz", + "updatedAt": 1715394828164 + }, + { + "role": "assistant", + "content": "杭州的天气情况如下:\n\n- 今天(5月11日)白天和晚上都有中雨,白天温度为25摄氏度,晚上温度为18摄氏度,南风,风力为4级。\n- 明天(5月12日)白天多云,晚上晴,白天温度为25摄氏度,晚上温度为16摄氏度,北风,风力为1-3级。\n- 后天(5月13日)白天和晚上都是晴天,白天温度为28摄氏度,晚上温度为16摄氏度,东北风,风力为1-3级。\n- 大后天(5月14日)白天多云,晚上阴天,白天温度为28摄氏度,晚上温度为15摄氏度,东风,风力为1-3级。\n\n请注意根据实际情况做好防雨和保暖措施。", + "fromModel": "gpt-3.5-turbo", + "fromProvider": "openai", + "parentId": "UQ5VAGkk", + "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f", + "topicId": "yjkdZafZ", + "createdAt": 1715395822125, + "id": "hbqsYoMn", + "updatedAt": 1715395830606, + "traceId": "4a8381a2-5582-46ce-87d8-c0f010de5c68", + "observationId": "cad6a5bd-6a7a-4ff9-b37a-3deaa23b094a" + } + ], + "sessions": [ + { + "config": { + "autoCreateTopicThreshold": 2, + "displayMode": "chat", + "enableAutoCreateTopic": true, + "historyCount": 1, + "model": "gpt-3.5-turbo", + "params": { + "frequency_penalty": 0, + "presence_penalty": 0, + "temperature": 0.6, + "top_p": 1 + }, + "plugins": ["realtime-weather", "steam"], + "provider": "openai", + "systemRole": "", + "tts": { + "showAllLocaleVoice": false, + "sttLocale": "auto", + "ttsService": "openai", + "voice": { "openai": "alloy" } + } + }, + "group": "default", + "meta": {}, + "pinned": false, + "type": "agent", + "createdAt": 1715394818440, + "id": "8eb073bb-e133-4dbd-81a2-5305a8447b5f", + "updatedAt": 1715394822366, + "model": "gpt-3.5-turbo" + } + ], + "topics": [ + { + "title": "杭州天气查询", + "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f", + "createdAt": 1715394835099, + "id": "yjkdZafZ", + "updatedAt": 1715394838061, + "favorite": false + } + ] + }, + "version": 5 +} diff --git a/src/migrations/FromV4ToV5/index.ts b/src/migrations/FromV4ToV5/index.ts new file mode 100644 index 000000000000..366188536a2e --- /dev/null +++ b/src/migrations/FromV4ToV5/index.ts @@ -0,0 +1,58 @@ +import type { Migration, MigrationData } from '@/migrations/VersionController'; + +import { V4ConfigState, V4Message } from './types/v4'; +import { V5ConfigState, V5Message } from './types/v5'; + +export class MigrationV4ToV5 implements Migration { + // from this version to start migration + version = 4; + + migrate(data: MigrationData): MigrationData { + const { messages } = data.state; + + return { + ...data, + state: { + ...data.state, + messages: MigrationV4ToV5.migrateMessage(messages), + }, + }; + } + + static migrateMessage(messages: V4Message[]): V5Message[] { + let v5Messages: V5Message[] = []; + + messages.forEach((item) => { + if (item.role === 'function') { + const toolCallId = `tool_call_${item.id}`; + const assistantMessageId = `tool_calls_${item.id}`; + const assistantMessage: V5Message = { + ...item, + content: '', + // make sure the createdAt is before than tool message + createdAt: item.createdAt - 10, + id: assistantMessageId, + plugin: undefined, + role: 'assistant', + tools: [{ ...item.plugin!, id: toolCallId }], + updatedAt: item.updatedAt - 10, + }; + + const toolMessage: V5Message = { + ...item, + parentId: assistantMessageId, + role: 'tool', + tool_call_id: toolCallId, + }; + v5Messages.push(assistantMessage, toolMessage); + } + + // if not function message, just push it + else { + v5Messages.push(item as V5Message); + } + }); + + return v5Messages; + } +} diff --git a/src/migrations/FromV4ToV5/migrations.test.ts b/src/migrations/FromV4ToV5/migrations.test.ts new file mode 100644 index 000000000000..9ce33d5fc50e --- /dev/null +++ b/src/migrations/FromV4ToV5/migrations.test.ts @@ -0,0 +1,49 @@ +import { describe } from 'vitest'; + +import { MigrationData, VersionController } from '@/migrations/VersionController'; + +import { MigrationV1ToV2 } from '../FromV1ToV2'; +import inputV1Data from '../FromV1ToV2/fixtures/input-v1-session.json'; +import { MigrationV2ToV3 } from '../FromV2ToV3'; +import { MigrationV3ToV4 } from '../FromV3ToV4'; +import outputDataFromV1ToV5 from './fixtures/from-v1-to-v5-output.json'; +import functionInputV4 from './fixtures/function-input-v4.json'; +import functionOutputV5 from './fixtures/function-output-v5.json'; +import { MigrationV4ToV5 } from './index'; + +describe('MigrationV4ToV5', () => { + let migrations; + let versionController: VersionController; + + beforeEach(() => { + migrations = [MigrationV4ToV5]; + versionController = new VersionController(migrations, 5); + }); + + describe('should migrate data correctly from previous versions', () => { + it('role=function', () => { + const data: MigrationData = functionInputV4; + + const migratedData = versionController.migrate(data); + + expect(migratedData.version).toEqual(functionOutputV5.version); + expect(migratedData.state.messages).toEqual(functionOutputV5.state.messages); + }); + }); + + it('should work correct from v1 to v5', () => { + const data: MigrationData = inputV1Data; + + versionController = new VersionController( + [MigrationV4ToV5, MigrationV3ToV4, MigrationV2ToV3, MigrationV1ToV2], + 5, + ); + + const migratedData = versionController.migrate(data); + + expect(migratedData.version).toEqual(outputDataFromV1ToV5.version); + expect(migratedData.state.messages).toEqual(outputDataFromV1ToV5.state.messages); + expect(migratedData.state.sessions).toEqual(outputDataFromV1ToV5.state.sessions); + expect(migratedData.state.topics).toEqual(outputDataFromV1ToV5.state.topics); + }); +}); diff --git a/src/migrations/FromV4ToV5/types/v4.ts b/src/migrations/FromV4ToV5/types/v4.ts new file mode 100644 index 000000000000..ee51440a916c --- /dev/null +++ b/src/migrations/FromV4ToV5/types/v4.ts @@ -0,0 +1,21 @@ +import { LobeToolRenderType } from '@/types/tool'; + +export interface V4ChatPluginPayload { + apiName: string; + arguments: string; + identifier: string; + type: LobeToolRenderType; +} + +export interface V4Message { + content: string; + createdAt: number; + id: string; + plugin?: V4ChatPluginPayload; + role: 'user' | 'system' | 'assistant' | 'function'; + updatedAt: number; +} + +export interface V4ConfigState { + messages: V4Message[]; +} diff --git a/src/migrations/FromV4ToV5/types/v5.ts b/src/migrations/FromV4ToV5/types/v5.ts new file mode 100644 index 000000000000..4241b3b2e886 --- /dev/null +++ b/src/migrations/FromV4ToV5/types/v5.ts @@ -0,0 +1,27 @@ +import { LobeToolRenderType } from '@/types/tool'; + +import { V4ChatPluginPayload } from './v4'; + +interface ChatToolPayload { + apiName: string; + arguments: string; + id: string; + identifier: string; + type: LobeToolRenderType; +} + +export interface V5Message { + content: string; + createdAt: number; + id: string; + parentId?: string; + plugin?: V4ChatPluginPayload; + role: 'user' | 'system' | 'assistant' | 'tool'; + tool_call_id?: string; + tools?: ChatToolPayload[]; + updatedAt: number; +} + +export interface V5ConfigState { + messages: V5Message[]; +} diff --git a/src/migrations/index.ts b/src/migrations/index.ts index a7c9304bbe8b..b91c22897e29 100644 --- a/src/migrations/index.ts +++ b/src/migrations/index.ts @@ -5,12 +5,19 @@ import { ConfigStateAll } from '@/types/exportConfig'; import { MigrationV0ToV1 } from './FromV0ToV1'; import { MigrationV1ToV2 } from './FromV1ToV2'; import { MigrationV3ToV4 } from './FromV3ToV4'; +import { MigrationV4ToV5 } from './FromV4ToV5'; // Current latest version -export const CURRENT_CONFIG_VERSION = 4; +export const CURRENT_CONFIG_VERSION = 5; // Version migrations module const ConfigMigrations = [ + /** + * 2024.05.11 + * + * role=function to role=tool + */ + MigrationV4ToV5, /** * 2024.04.09 * settings migrate the `languageModel` diff --git a/src/services/chat.ts b/src/services/chat.ts index 9db2af302bdf..316a39dd1b8a 100644 --- a/src/services/chat.ts +++ b/src/services/chat.ts @@ -427,18 +427,12 @@ class ChatService { name: genToolCallingName(tool.identifier, tool.apiName, tool.type), }, id: tool.id, - type: tool.type, + type: 'function', }), ), }; } - // TODO: need to be removed after upgrade - case 'function': { - const name = m.plugin?.identifier as string; - return { content: m.content, name, role: m.role }; - } - case 'tool': { return { content: m.content, diff --git a/src/store/chat/slices/message/action.ts b/src/store/chat/slices/message/action.ts index 1bae1ab27fef..54c5719557f5 100644 --- a/src/store/chat/slices/message/action.ts +++ b/src/store/chat/slices/message/action.ts @@ -564,7 +564,7 @@ export const chatMessage: StateCreator< let contextMessages: ChatMessage[] = []; switch (currentMessage.role) { - case 'function': + case 'tool': case 'user': { contextMessages = chats.slice(0, currentIndex + 1); break; diff --git a/src/store/chat/slices/share/action.test.ts b/src/store/chat/slices/share/action.test.ts index 6e1caa6bccf0..71b6617b01de 100644 --- a/src/store/chat/slices/share/action.test.ts +++ b/src/store/chat/slices/share/action.test.ts @@ -85,7 +85,7 @@ describe('shareSlice actions', () => { it('should include plugin information when withPluginInfo is true', async () => { // 模拟带有插件信息的消息 const pluginMessage = { - role: 'function', + role: 'tool', content: 'plugin content', plugin: { type: 'default', @@ -118,7 +118,7 @@ describe('shareSlice actions', () => { it('should not include plugin information when withPluginInfo is false', async () => { const pluginMessage = { - role: 'function', + role: 'tool', content: 'plugin content', plugin: { type: 'default', diff --git a/src/store/chat/slices/share/action.ts b/src/store/chat/slices/share/action.ts index 5922bbaf20a6..2bb03d928aac 100644 --- a/src/store/chat/slices/share/action.ts +++ b/src/store/chat/slices/share/action.ts @@ -81,7 +81,7 @@ export const chatShare: StateCreator; files?: string[]; - /** - * only used in tool calling - */ - name?: string; /** * observation id */ @@ -66,7 +63,7 @@ export interface ChatMessage extends BaseDataModel { /** * message role type */ - role: LLMRoleType; + role: MessageRoleType; sessionId?: string; tool_call_id?: string; From 59402d00e13634284e73364cc215adcf48df009e Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Sat, 11 May 2024 05:55:16 +0000 Subject: [PATCH 17/24] =?UTF-8?q?=F0=9F=97=83=EF=B8=8F=20refactor:=20migra?= =?UTF-8?q?te=20database?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/database/client/core/db.ts | 32 +++++++++++++++++++++++++++++ src/database/client/core/schemas.ts | 9 ++++++++ 2 files changed, 41 insertions(+) diff --git a/src/database/client/core/db.ts b/src/database/client/core/db.ts index 9c16d69f00dc..7de75d88eb08 100644 --- a/src/database/client/core/db.ts +++ b/src/database/client/core/db.ts @@ -19,6 +19,7 @@ import { dbSchemaV5, dbSchemaV6, dbSchemaV7, + dbSchemaV9, } from './schemas'; import { DBModel, LOBE_CHAT_LOCAL_DB_NAME } from './types/db'; @@ -67,6 +68,10 @@ export class BrowserDB extends Dexie { .stores(dbSchemaV7) .upgrade((trans) => this.upgradeToV8(trans)); + this.version(9) + .stores(dbSchemaV9) + .upgrade((trans) => this.upgradeToV9(trans)); + this.files = this.table('files'); this.sessions = this.table('sessions'); this.messages = this.table('messages'); @@ -153,6 +158,33 @@ export class BrowserDB extends Dexie { } }); }; + + upgradeToV9 = async (trans: Transaction) => { + const messages = trans.table('messages'); + await messages.toCollection().modify(async (message: DBModel) => { + if ((message.role as string) === 'function') { + const origin = Object.assign({}, message); + + const toolCallId = `tool_call_${message.id}`; + const assistantMessageId = `tool_calls_${message.id}`; + + message.role = 'tool'; + message.tool_call_id = toolCallId; + message.parentId = assistantMessageId; + + await messages.add({ + ...origin, + content: '', + createdAt: message.createdAt - 10, + error: undefined, + id: assistantMessageId, + role: 'assistant', + tools: [{ ...message.plugin!, id: toolCallId }], + updatedAt: message.updatedAt - 10, + } as DBModel); + } + }); + }; } export const browserDB = new BrowserDB(); diff --git a/src/database/client/core/schemas.ts b/src/database/client/core/schemas.ts index f07a699c96c4..ae066de791d7 100644 --- a/src/database/client/core/schemas.ts +++ b/src/database/client/core/schemas.ts @@ -76,3 +76,12 @@ export const dbSchemaV7 = { plugins: '&identifier, id, type, manifest.type, manifest.meta.title, manifest.meta.description, manifest.meta.author, createdAt, updatedAt', }; +// ************************************** // +// ******* Version 9 - 2024-03-14 ******* // +// ************************************** // +// - Added id to `plugins` table +export const dbSchemaV9 = { + ...dbSchemaV7, + messages: + '&id, role, content, fromModel, favorite, tool_call_id, plugin.identifier, plugin.apiName, translate.content, createdAt, updatedAt, sessionId, topicId, quotaId, parentId, [sessionId+topicId], traceId', +}; From 41e9ec408ea91a5e0734ea69d40a42ef32e253b6 Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Sat, 11 May 2024 06:41:24 +0000 Subject: [PATCH 18/24] =?UTF-8?q?=E2=9C=85=20test:=20fix=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../client/models/__tests__/message.test.ts | 2 +- src/services/__tests__/chat.test.ts | 28 ++++++------------- src/store/chat/slices/share/action.test.ts | 2 +- src/utils/toolCall.ts | 2 +- 4 files changed, 12 insertions(+), 22 deletions(-) diff --git a/src/database/client/models/__tests__/message.test.ts b/src/database/client/models/__tests__/message.test.ts index 26687e7c6b0a..23f7819a6b15 100644 --- a/src/database/client/models/__tests__/message.test.ts +++ b/src/database/client/models/__tests__/message.test.ts @@ -271,7 +271,7 @@ describe('MessageModel', () => { await MessageModel.update(createdMessage.id, updateData); const updatedMessage = await MessageModel.findById(createdMessage.id); - expect(updatedMessage).toHaveProperty('role', 'function'); + expect(updatedMessage).toHaveProperty('role', 'tool'); }); }); diff --git a/src/services/__tests__/chat.test.ts b/src/services/__tests__/chat.test.ts index b9f403533e2b..376a89ab9859 100644 --- a/src/services/__tests__/chat.test.ts +++ b/src/services/__tests__/chat.test.ts @@ -126,7 +126,7 @@ describe('ChatService', () => { it('should include image content when with vision model', async () => { const messages = [ { content: 'Hello', role: 'user', files: ['file1'] }, // Message with files - { content: 'Hi', role: 'function', plugin: { identifier: 'plugin1' } }, // Message with function role + { content: 'Hi', role: 'tool', plugin: { identifier: 'plugin1', apiName: 'api1' } }, // Message with tool role { content: 'Hey', role: 'assistant' }, // Regular user message ] as ChatMessage[]; @@ -166,8 +166,8 @@ describe('ChatService', () => { }, { content: 'Hi', - name: 'plugin1', - role: 'function', + name: 'plugin1____api1', + role: 'tool', }, { content: 'Hey', @@ -183,7 +183,7 @@ describe('ChatService', () => { it('should not include image content when default model', async () => { const messages = [ { content: 'Hello', role: 'user', files: ['file1'] }, // Message with files - { content: 'Hi', role: 'function', plugin: { identifier: 'plugin1' } }, // Message with function role + { content: 'Hi', role: 'tool', plugin: { identifier: 'plugin1', apiName: 'api1' } }, // Message with function role { content: 'Hey', role: 'assistant' }, // Regular user message ] as ChatMessage[]; @@ -212,7 +212,7 @@ describe('ChatService', () => { { messages: [ { content: 'Hello', role: 'user' }, - { content: 'Hi', name: 'plugin1', role: 'function' }, + { content: 'Hi', name: 'plugin1____api1', role: 'tool' }, { content: 'Hey', role: 'assistant' }, ], model: 'gpt-3.5-turbo', @@ -224,7 +224,7 @@ describe('ChatService', () => { it('should not include image with vision models when can not find the image', async () => { const messages = [ { content: 'Hello', role: 'user', files: ['file2'] }, // Message with files - { content: 'Hi', role: 'function', plugin: { identifier: 'plugin1' } }, // Message with function role + { content: 'Hi', role: 'tool', plugin: { identifier: 'plugin1', apiName: 'api1' } }, // Message with function role { content: 'Hey', role: 'assistant' }, // Regular user message ] as ChatMessage[]; @@ -248,19 +248,9 @@ describe('ChatService', () => { expect(getChatCompletionSpy).toHaveBeenCalledWith( { messages: [ - { - content: 'Hello', - role: 'user', - }, - { - content: 'Hi', - name: 'plugin1', - role: 'function', - }, - { - content: 'Hey', - role: 'assistant', - }, + { content: 'Hello', role: 'user' }, + { content: 'Hi', name: 'plugin1____api1', role: 'tool' }, + { content: 'Hey', role: 'assistant' }, ], }, undefined, diff --git a/src/store/chat/slices/share/action.test.ts b/src/store/chat/slices/share/action.test.ts index 71b6617b01de..f40ae56181e9 100644 --- a/src/store/chat/slices/share/action.test.ts +++ b/src/store/chat/slices/share/action.test.ts @@ -154,7 +154,7 @@ describe('shareSlice actions', () => { { role: 'user', content: 'user message', id: '1' }, { role: 'assistant', content: 'assistant message', id: '2' }, { - role: 'function', + role: 'tool', content: 'plugin content', plugin: { type: 'default', diff --git a/src/utils/toolCall.ts b/src/utils/toolCall.ts index 59046f697992..0f7737404b34 100644 --- a/src/utils/toolCall.ts +++ b/src/utils/toolCall.ts @@ -2,7 +2,7 @@ import { Md5 } from 'ts-md5'; import { PLUGIN_SCHEMA_API_MD5_PREFIX, PLUGIN_SCHEMA_SEPARATOR } from '@/const/plugin'; -export const genToolCallingName = (identifier: string, name: string, type?: string) => { +export const genToolCallingName = (identifier: string, name: string, type: string = 'default') => { const pluginType = type && type !== 'default' ? `${PLUGIN_SCHEMA_SEPARATOR + type}` : ''; // 将插件的 identifier 作为前缀,避免重复 From e7ae1006de2761a663291d9b178a958bdc7d0931 Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Sat, 11 May 2024 07:36:18 +0000 Subject: [PATCH 19/24] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20refactor?= =?UTF-8?q?=20createCallbacksTransformer=20to=20fix=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/libs/agent-runtime/types/chat.ts | 18 +++++-- .../agent-runtime/utils/streams/anthropic.ts | 3 +- .../utils/streams/bedrock/claude.ts | 8 +-- .../utils/streams/bedrock/llama.ts | 13 +++-- .../agent-runtime/utils/streams/google-ai.ts | 3 +- .../agent-runtime/utils/streams/minimax.ts | 3 +- .../agent-runtime/utils/streams/ollama.ts | 18 ++++--- .../agent-runtime/utils/streams/openai.ts | 3 +- .../agent-runtime/utils/streams/protocol.ts | 51 +++++++++++++++++++ 9 files changed, 96 insertions(+), 24 deletions(-) diff --git a/src/libs/agent-runtime/types/chat.ts b/src/libs/agent-runtime/types/chat.ts index 93aebf994997..dabe06a62461 100644 --- a/src/libs/agent-runtime/types/chat.ts +++ b/src/libs/agent-runtime/types/chat.ts @@ -1,5 +1,3 @@ -import { OpenAIStreamCallbacks } from 'ai'; - import { MessageToolCall } from '@/types/message'; export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function' | 'tool'; @@ -131,4 +129,18 @@ export interface ChatCompletionTool { type: 'function'; } -export type ChatStreamCallbacks = OpenAIStreamCallbacks; +export interface ChatStreamCallbacks { + /** + * `onCompletion`: Called for each tokenized message. + **/ + onCompletion?: (completion: string) => Promise | void; + /** `onFinal`: Called once when the stream is closed with the final completion message. */ + onFinal?: (completion: string) => Promise | void; + /** `onStart`: Called once when the stream is initialized. */ + onStart?: () => Promise | void; + /** `onText`: Called for each text chunk. */ + onText?: (text: string) => Promise | void; + /** `onToken`: Called for each tokenized message. */ + onToken?: (token: string) => Promise | void; + onToolCall?: () => Promise | void; +} diff --git a/src/libs/agent-runtime/utils/streams/anthropic.ts b/src/libs/agent-runtime/utils/streams/anthropic.ts index d60f3362470e..e17c0ffa42e3 100644 --- a/src/libs/agent-runtime/utils/streams/anthropic.ts +++ b/src/libs/agent-runtime/utils/streams/anthropic.ts @@ -1,6 +1,6 @@ import Anthropic from '@anthropic-ai/sdk'; import type { Stream } from '@anthropic-ai/sdk/streaming'; -import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai'; +import { readableFromAsyncIterable } from 'ai'; import { ChatStreamCallbacks } from '../../types'; import { @@ -8,6 +8,7 @@ import { StreamProtocolToolCallChunk, StreamStack, StreamToolCallChunkData, + createCallbacksTransformer, createSSEProtocolTransformer, } from './protocol'; diff --git a/src/libs/agent-runtime/utils/streams/bedrock/claude.ts b/src/libs/agent-runtime/utils/streams/bedrock/claude.ts index bfd2945b8329..ac74a985c813 100644 --- a/src/libs/agent-runtime/utils/streams/bedrock/claude.ts +++ b/src/libs/agent-runtime/utils/streams/bedrock/claude.ts @@ -1,15 +1,15 @@ import { InvokeModelWithResponseStreamResponse } from '@aws-sdk/client-bedrock-runtime'; -import { type AIStreamCallbacksAndOptions, createCallbacksTransformer } from 'ai'; import { nanoid } from '@/utils/uuid'; +import { ChatStreamCallbacks } from '../../../types'; import { transformAnthropicStream } from '../anthropic'; -import { StreamStack, createSSEProtocolTransformer } from '../protocol'; +import { StreamStack, createCallbacksTransformer, createSSEProtocolTransformer } from '../protocol'; import { createBedrockStream } from './common'; export const AWSBedrockClaudeStream = ( res: InvokeModelWithResponseStreamResponse | ReadableStream, - cb?: AIStreamCallbacksAndOptions, + cb?: ChatStreamCallbacks, ): ReadableStream => { const streamStack: StreamStack = { id: 'chat_' + nanoid() }; @@ -17,5 +17,5 @@ export const AWSBedrockClaudeStream = ( return stream .pipeThrough(createSSEProtocolTransformer(transformAnthropicStream, streamStack)) - .pipeThrough(createCallbacksTransformer(cb) as any); + .pipeThrough(createCallbacksTransformer(cb)); }; diff --git a/src/libs/agent-runtime/utils/streams/bedrock/llama.ts b/src/libs/agent-runtime/utils/streams/bedrock/llama.ts index adec93f736a5..78e0f5b327d5 100644 --- a/src/libs/agent-runtime/utils/streams/bedrock/llama.ts +++ b/src/libs/agent-runtime/utils/streams/bedrock/llama.ts @@ -1,9 +1,14 @@ import { InvokeModelWithResponseStreamResponse } from '@aws-sdk/client-bedrock-runtime'; -import { type AIStreamCallbacksAndOptions, createCallbacksTransformer } from 'ai'; import { nanoid } from '@/utils/uuid'; -import { StreamProtocolChunk, StreamStack, createSSEProtocolTransformer } from '../protocol'; +import { ChatStreamCallbacks } from '../../../types'; +import { + StreamProtocolChunk, + StreamStack, + createCallbacksTransformer, + createSSEProtocolTransformer, +} from '../protocol'; import { createBedrockStream } from './common'; interface AmazonBedrockInvocationMetrics { @@ -34,7 +39,7 @@ export const transformLlamaStream = ( export const AWSBedrockLlamaStream = ( res: InvokeModelWithResponseStreamResponse | ReadableStream, - cb?: AIStreamCallbacksAndOptions, + cb?: ChatStreamCallbacks, ): ReadableStream => { const streamStack: StreamStack = { id: 'chat_' + nanoid() }; @@ -42,5 +47,5 @@ export const AWSBedrockLlamaStream = ( return stream .pipeThrough(createSSEProtocolTransformer(transformLlamaStream, streamStack)) - .pipeThrough(createCallbacksTransformer(cb) as any); + .pipeThrough(createCallbacksTransformer(cb)); }; diff --git a/src/libs/agent-runtime/utils/streams/google-ai.ts b/src/libs/agent-runtime/utils/streams/google-ai.ts index 92a6bfd10c8d..ff457c52be73 100644 --- a/src/libs/agent-runtime/utils/streams/google-ai.ts +++ b/src/libs/agent-runtime/utils/streams/google-ai.ts @@ -2,7 +2,7 @@ import { EnhancedGenerateContentResponse, GenerateContentStreamResult, } from '@google/generative-ai'; -import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai'; +import { readableFromAsyncIterable } from 'ai'; import { nanoid } from '@/utils/uuid'; @@ -12,6 +12,7 @@ import { StreamStack, StreamToolCallChunkData, chatStreamable, + createCallbacksTransformer, createSSEProtocolTransformer, generateToolCallId, } from './protocol'; diff --git a/src/libs/agent-runtime/utils/streams/minimax.ts b/src/libs/agent-runtime/utils/streams/minimax.ts index a517b9f794e4..05629bafdc01 100644 --- a/src/libs/agent-runtime/utils/streams/minimax.ts +++ b/src/libs/agent-runtime/utils/streams/minimax.ts @@ -1,9 +1,8 @@ -import { createCallbacksTransformer } from 'ai'; import OpenAI from 'openai'; import { ChatStreamCallbacks } from '../../types'; import { transformOpenAIStream } from './openai'; -import { createSSEProtocolTransformer } from './protocol'; +import { createCallbacksTransformer, createSSEProtocolTransformer } from './protocol'; const unit8ArrayToJSONChunk = (unit8Array: Uint8Array): OpenAI.ChatCompletionChunk => { const decoder = new TextDecoder(); diff --git a/src/libs/agent-runtime/utils/streams/ollama.ts b/src/libs/agent-runtime/utils/streams/ollama.ts index 728b78dedff2..32d4c5197d8b 100644 --- a/src/libs/agent-runtime/utils/streams/ollama.ts +++ b/src/libs/agent-runtime/utils/streams/ollama.ts @@ -1,13 +1,15 @@ -import { - type AIStreamCallbacksAndOptions, - createCallbacksTransformer, - readableFromAsyncIterable, -} from 'ai'; +import { readableFromAsyncIterable } from 'ai'; import { ChatResponse } from 'ollama/browser'; +import { ChatStreamCallbacks } from '@/libs/agent-runtime'; import { nanoid } from '@/utils/uuid'; -import { StreamProtocolChunk, StreamStack, createSSEProtocolTransformer } from './protocol'; +import { + StreamProtocolChunk, + StreamStack, + createCallbacksTransformer, + createSSEProtocolTransformer, +} from './protocol'; const transformOllamaStream = (chunk: ChatResponse, stack: StreamStack): StreamProtocolChunk => { // maybe need another structure to add support for multiple choices @@ -26,11 +28,11 @@ const chatStreamable = async function* (stream: AsyncIterable) { export const OllamaStream = ( res: AsyncIterable, - cb?: AIStreamCallbacksAndOptions, + cb?: ChatStreamCallbacks, ): ReadableStream => { const streamStack: StreamStack = { id: 'chat_' + nanoid() }; return readableFromAsyncIterable(chatStreamable(res)) .pipeThrough(createSSEProtocolTransformer(transformOllamaStream, streamStack)) - .pipeThrough(createCallbacksTransformer(cb) as any); + .pipeThrough(createCallbacksTransformer(cb)); }; diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts index 908c6575a6fd..3867bc01198c 100644 --- a/src/libs/agent-runtime/utils/streams/openai.ts +++ b/src/libs/agent-runtime/utils/streams/openai.ts @@ -1,4 +1,4 @@ -import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai'; +import { readableFromAsyncIterable } from 'ai'; import OpenAI from 'openai'; import type { Stream } from 'openai/streaming'; @@ -7,6 +7,7 @@ import { StreamProtocolChunk, StreamProtocolToolCallChunk, StreamToolCallChunkData, + createCallbacksTransformer, createSSEProtocolTransformer, generateToolCallId, } from './protocol'; diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts index 408220fbb77d..f78e78ce920a 100644 --- a/src/libs/agent-runtime/utils/streams/protocol.ts +++ b/src/libs/agent-runtime/utils/streams/protocol.ts @@ -1,3 +1,5 @@ +import { ChatStreamCallbacks } from '@/libs/agent-runtime'; + export interface StreamStack { id: string; } @@ -47,3 +49,52 @@ export const createSSEProtocolTransformer = ( controller.enqueue(`data: ${JSON.stringify(data)}\n\n`); }, }); + +export function createCallbacksTransformer(cb: ChatStreamCallbacks | undefined) { + const textEncoder = new TextEncoder(); + let aggregatedResponse = ''; + let currentType = ''; + const callbacks = cb || {}; + + return new TransformStream({ + async flush(): Promise { + if (callbacks.onCompletion) { + await callbacks.onCompletion(aggregatedResponse); + } + + if (callbacks.onFinal) { + await callbacks.onFinal(aggregatedResponse); + } + }, + + async start(): Promise { + if (callbacks.onStart) await callbacks.onStart(); + }, + + async transform(chunk: string, controller): Promise { + controller.enqueue(textEncoder.encode(chunk)); + + // track the type of the chunk + if (chunk.startsWith('event:')) { + currentType = chunk.split('event:')[1].trim(); + } + // if the message is a data chunk, handle the callback + else if (chunk.startsWith('data:')) { + const content = chunk.split('data:')[1].trim(); + + switch (currentType) { + case 'text': { + await callbacks.onText?.(content); + await callbacks.onToken?.(JSON.parse(content)); + break; + } + + case 'tool_calls': { + // TODO: make on ToolCall callback + await callbacks.onToolCall?.(); + } + } + } + }, + }); +} From d4965b293f1a6a4d6d0b448473d01d91632fa45d Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Sat, 11 May 2024 10:13:04 +0000 Subject: [PATCH 20/24] =?UTF-8?q?=20=E2=9C=85=20test:=20add=20tests=20for?= =?UTF-8?q?=20runtime?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../agent-runtime/anthropic/index.test.ts | 195 +++++++++++++++++ src/libs/agent-runtime/google/index.test.ts | 154 ++++++++++++++ src/libs/agent-runtime/google/index.ts | 8 +- src/libs/agent-runtime/groq/index.test.ts | 39 ++++ src/libs/agent-runtime/ollama/index.test.ts | 197 ++++++++++++++++++ .../openaiCompatibleFactory/index.test.ts | 155 +++++++++++++- 6 files changed, 733 insertions(+), 15 deletions(-) create mode 100644 src/libs/agent-runtime/ollama/index.test.ts diff --git a/src/libs/agent-runtime/anthropic/index.test.ts b/src/libs/agent-runtime/anthropic/index.test.ts index face8a0f3d6a..2c42c29cd859 100644 --- a/src/libs/agent-runtime/anthropic/index.test.ts +++ b/src/libs/agent-runtime/anthropic/index.test.ts @@ -1,6 +1,9 @@ // @vitest-environment node import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { ChatCompletionTool } from '@/libs/agent-runtime'; + +import * as anthropicHelpers from '../utils/anthropicHelpers'; import * as debugStreamModule from '../utils/debugStream'; import { LobeAnthropicAI } from './index'; @@ -16,6 +19,10 @@ beforeEach(() => { // 使用 vi.spyOn 来模拟 chat.completions.create 方法 vi.spyOn(instance['client'].messages, 'create').mockReturnValue(new ReadableStream() as any); + + vi.spyOn(instance['client'].beta.tools.messages, 'create').mockReturnValue({ + content: [], + } as any); }); afterEach(() => { @@ -233,6 +240,54 @@ describe('LobeAnthropicAI', () => { process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION = originalDebugValue; }); + describe('chat with tools', () => { + it('should call client.beta.tools.messages.create when tools are provided', async () => { + // Arrange + const tools: ChatCompletionTool[] = [ + { function: { name: 'tool1', description: 'desc1' }, type: 'function' }, + ]; + const spyOn = vi.spyOn(anthropicHelpers, 'buildAnthropicTools'); + + // Act + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'claude-3-haiku-20240307', + temperature: 1, + tools, + }); + + // Assert + expect(instance['client'].beta.tools.messages.create).toHaveBeenCalled(); + expect(spyOn).toHaveBeenCalledWith(tools); + }); + + it('should handle text and tool_use content correctly in transformResponseToStream', async () => { + // Arrange + const mockResponse = { + content: [ + { type: 'text', text: 'Hello' }, + { type: 'tool_use', id: 'tool1', name: 'tool1', input: 'input1' }, + ], + }; + // @ts-ignore + vi.spyOn(instance, 'transformResponseToStream').mockReturnValue(new ReadableStream()); + vi.spyOn(instance['client'].beta.tools.messages, 'create').mockResolvedValue( + mockResponse as any, + ); + + // Act + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'claude-3-haiku-20240307', + temperature: 0, + tools: [{ function: { name: 'tool1', description: 'desc1' }, type: 'function' }], + }); + + // Assert + expect(instance['transformResponseToStream']).toHaveBeenCalledWith(mockResponse); + }); + }); + describe('Error', () => { it('should throw InvalidAnthropicAPIKey error on API_KEY_INVALID error', async () => { // Arrange @@ -305,5 +360,145 @@ describe('LobeAnthropicAI', () => { } }); }); + + describe('Error handling', () => { + it('should throw LocationNotSupportError on 403 error', async () => { + // Arrange + const apiError = { status: 403 }; + (instance['client'].messages.create as Mock).mockRejectedValue(apiError); + + // Act & Assert + await expect( + instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'claude-3-haiku-20240307', + temperature: 1, + }), + ).rejects.toEqual({ + endpoint: 'https://api.anthropic.com', + error: apiError, + errorType: 'LocationNotSupportError', + provider, + }); + }); + + it('should throw AnthropicBizError on other error status codes', async () => { + // Arrange + const apiError = { status: 500 }; + (instance['client'].messages.create as Mock).mockRejectedValue(apiError); + + // Act & Assert + await expect( + instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'claude-3-haiku-20240307', + temperature: 1, + }), + ).rejects.toEqual({ + endpoint: 'https://api.anthropic.com', + error: apiError, + errorType: 'AnthropicBizError', + provider, + }); + }); + + it('should desensitize custom baseURL in error message', async () => { + // Arrange + const apiError = { status: 401 }; + const customInstance = new LobeAnthropicAI({ + apiKey: 'test', + baseURL: 'https://api.custom.com/v1', + }); + vi.spyOn(customInstance['client'].messages, 'create').mockRejectedValue(apiError); + + // Act & Assert + await expect( + customInstance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'claude-3-haiku-20240307', + temperature: 0, + }), + ).rejects.toEqual({ + endpoint: 'https://api.cu****om.com/v1', + error: apiError, + errorType: 'InvalidAnthropicAPIKey', + provider, + }); + }); + }); + + describe('Options', () => { + it('should pass signal to API call', async () => { + // Arrange + const controller = new AbortController(); + + // Act + await instance.chat( + { + messages: [{ content: 'Hello', role: 'user' }], + model: 'claude-3-haiku-20240307', + temperature: 1, + }, + { signal: controller.signal }, + ); + + // Assert + expect(instance['client'].messages.create).toHaveBeenCalledWith( + expect.objectContaining({}), + { signal: controller.signal }, + ); + }); + + it('should apply callback to the returned stream', async () => { + // Arrange + const callback = vi.fn(); + + // Act + await instance.chat( + { + messages: [{ content: 'Hello', role: 'user' }], + model: 'claude-3-haiku-20240307', + temperature: 0, + }, + { + callback: { onStart: callback }, + }, + ); + + // Assert + expect(callback).toHaveBeenCalled(); + }); + + it('should set headers on the response', async () => { + // Arrange + const headers = { 'X-Test-Header': 'test' }; + + // Act + const result = await instance.chat( + { + messages: [{ content: 'Hello', role: 'user' }], + model: 'claude-3-haiku-20240307', + temperature: 1, + }, + { headers }, + ); + + // Assert + expect(result.headers.get('X-Test-Header')).toBe('test'); + }); + }); + + describe('Edge cases', () => { + it('should handle empty messages array', async () => { + // Act & Assert + await expect( + instance.chat({ + messages: [], + model: 'claude-3-haiku-20240307', + temperature: 1, + }), + ).resolves.toBeInstanceOf(Response); + }); + }); }); }); diff --git a/src/libs/agent-runtime/google/index.test.ts b/src/libs/agent-runtime/google/index.test.ts index 323738480730..0fc4a34179c8 100644 --- a/src/libs/agent-runtime/google/index.test.ts +++ b/src/libs/agent-runtime/google/index.test.ts @@ -1,4 +1,6 @@ // @vitest-environment edge-runtime +import { FunctionDeclarationSchemaType } from '@google/generative-ai'; +import { JSONSchema7 } from 'json-schema'; import OpenAI from 'openai'; import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; @@ -426,5 +428,157 @@ describe('LobeGoogleAI', () => { expect(model).toEqual('gemini-pro-vision'); }); }); + + describe('buildGoogleTools', () => { + it('should return undefined when tools is undefined or empty', () => { + expect(instance['buildGoogleTools'](undefined)).toBeUndefined(); + expect(instance['buildGoogleTools']([])).toBeUndefined(); + }); + + it('should correctly convert ChatCompletionTool to GoogleFunctionCallTool', () => { + const tools: OpenAI.ChatCompletionTool[] = [ + { + function: { + name: 'testTool', + description: 'A test tool', + parameters: { + type: 'object', + properties: { + param1: { type: 'string' }, + param2: { type: 'number' }, + }, + required: ['param1'], + }, + }, + type: 'function', + }, + ]; + + const googleTools = instance['buildGoogleTools'](tools); + + expect(googleTools).toHaveLength(1); + expect(googleTools![0].functionDeclarations![0]).toEqual({ + name: 'testTool', + description: 'A test tool', + parameters: { + type: FunctionDeclarationSchemaType.OBJECT, + properties: { + param1: { type: FunctionDeclarationSchemaType.STRING }, + param2: { type: FunctionDeclarationSchemaType.NUMBER }, + }, + required: ['param1'], + }, + }); + }); + }); + + describe('convertSchemaObject', () => { + it('should correctly convert object schema', () => { + const schema: JSONSchema7 = { + type: 'object', + properties: { + prop1: { type: 'string' }, + prop2: { type: 'number' }, + }, + }; + + const converted = instance['convertSchemaObject'](schema); + + expect(converted).toEqual({ + type: FunctionDeclarationSchemaType.OBJECT, + properties: { + prop1: { type: FunctionDeclarationSchemaType.STRING }, + prop2: { type: FunctionDeclarationSchemaType.NUMBER }, + }, + }); + }); + + // 类似地添加 array/string/number/boolean 类型schema的测试用例 + // ... + + it('should correctly convert nested schema', () => { + const schema: JSONSchema7 = { + type: 'object', + properties: { + nested: { + type: 'array', + items: { + type: 'object', + properties: { + prop: { type: 'string' }, + }, + }, + }, + }, + }; + + const converted = instance['convertSchemaObject'](schema); + + expect(converted).toEqual({ + type: FunctionDeclarationSchemaType.OBJECT, + properties: { + nested: { + type: FunctionDeclarationSchemaType.ARRAY, + items: { + type: FunctionDeclarationSchemaType.OBJECT, + properties: { + prop: { type: FunctionDeclarationSchemaType.STRING }, + }, + }, + }, + }, + }); + }); + }); + + describe('convertOAIMessagesToGoogleMessage', () => { + it('should correctly convert assistant message', () => { + const message: OpenAIChatMessage = { + role: 'assistant', + content: 'Hello', + }; + + const converted = instance['convertOAIMessagesToGoogleMessage'](message); + + expect(converted).toEqual({ + role: 'model', + parts: [{ text: 'Hello' }], + }); + }); + + it('should correctly convert user message', () => { + const message: OpenAIChatMessage = { + role: 'user', + content: 'Hi', + }; + + const converted = instance['convertOAIMessagesToGoogleMessage'](message); + + expect(converted).toEqual({ + role: 'user', + parts: [{ text: 'Hi' }], + }); + }); + + it('should correctly convert message with content parts', () => { + const message: OpenAIChatMessage = { + role: 'user', + content: [ + { type: 'text', text: 'Check this image:' }, + { type: 'image_url', image_url: { url: 'data:image/png;base64,...' } }, + ], + }; + + const converted = instance['convertOAIMessagesToGoogleMessage'](message); + + expect(converted).toEqual({ + role: 'user', + parts: [ + { text: 'Check this image:' }, + { inlineData: { data: '...', mimeType: 'image/png' } }, + ], + }); + }); + }); }); }); diff --git a/src/libs/agent-runtime/google/index.ts b/src/libs/agent-runtime/google/index.ts index e9984be47799..cf4c0689ac9b 100644 --- a/src/libs/agent-runtime/google/index.ts +++ b/src/libs/agent-runtime/google/index.ts @@ -247,11 +247,7 @@ export class LobeGoogleAI implements LobeRuntimeAI { return [ { - functionDeclarations: tools.map((tool) => { - const t = this.convertToolToGoogleTool(tool); - console.log('output Schema', t); - return t; - }), + functionDeclarations: tools.map((tool) => this.convertToolToGoogleTool(tool)), }, ]; } @@ -260,8 +256,6 @@ export class LobeGoogleAI implements LobeRuntimeAI { const functionDeclaration = tool.function; const parameters = functionDeclaration.parameters; - console.log('input Schema', JSON.stringify(parameters, null, 2)); - return { description: functionDeclaration.description, name: functionDeclaration.name, diff --git a/src/libs/agent-runtime/groq/index.test.ts b/src/libs/agent-runtime/groq/index.test.ts index e8363720fdb0..fda104e1faa2 100644 --- a/src/libs/agent-runtime/groq/index.test.ts +++ b/src/libs/agent-runtime/groq/index.test.ts @@ -71,6 +71,45 @@ describe('LobeGroqAI', () => { expect(result).toBeInstanceOf(Response); }); + describe('handlePayload option', () => { + it('should set stream to false when payload contains tools', async () => { + const mockCreateMethod = vi + .spyOn(instance['client'].chat.completions, 'create') + .mockResolvedValue({ + id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO', + object: 'chat.completion', + created: 1709125675, + model: 'mistralai/mistral-7b-instruct:free', + system_fingerprint: 'fp_86156a94a0', + choices: [ + { + index: 0, + message: { role: 'assistant', content: 'hello' }, + logprobs: null, + finish_reason: 'stop', + }, + ], + }); + + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + tools: [ + { + type: 'function', + function: { name: 'tool1', description: '', parameters: {} }, + }, + ], + }); + + expect(mockCreateMethod).toHaveBeenCalledWith( + expect.objectContaining({ stream: false }), + expect.anything(), + ); + }); + }); + describe('Error', () => { it('should return OpenRouterBizError with an openai error response when OpenAI.APIError is thrown', async () => { // Arrange diff --git a/src/libs/agent-runtime/ollama/index.test.ts b/src/libs/agent-runtime/ollama/index.test.ts new file mode 100644 index 000000000000..ca9596123ee1 --- /dev/null +++ b/src/libs/agent-runtime/ollama/index.test.ts @@ -0,0 +1,197 @@ +import { Ollama } from 'ollama/browser'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { AgentRuntimeErrorType } from '../error'; +import { ModelProvider } from '../types'; +import { AgentRuntimeError } from '../utils/createError'; +import { LobeOllamaAI } from './index'; + +vi.mock('ollama/browser'); + +describe('LobeOllamaAI', () => { + let ollamaAI: LobeOllamaAI; + + beforeEach(() => { + ollamaAI = new LobeOllamaAI({ baseURL: 'https://example.com' }); + }); + + afterEach(() => { + vi.resetAllMocks(); + }); + + describe('constructor', () => { + it('should initialize Ollama client and baseURL with valid baseURL', () => { + expect(ollamaAI['client']).toBeInstanceOf(Ollama); + expect(ollamaAI.baseURL).toBe('https://example.com'); + }); + + it('should throw AgentRuntimeError with invalid baseURL', () => { + try { + new LobeOllamaAI({ baseURL: 'invalid-url' }); + } catch (e) { + expect(e).toEqual(AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidOllamaArgs)); + } + }); + }); + + describe('chat', () => { + it('should call Ollama client chat method and return StreamingResponse', async () => { + const chatMock = vi.fn().mockResolvedValue({}); + vi.mocked(Ollama.prototype.chat).mockImplementation(chatMock); + + const payload = { + messages: [{ content: 'Hello', role: 'user' }], + model: 'model-id', + }; + const options = { signal: new AbortController().signal }; + + const response = await ollamaAI.chat(payload as any, options); + + expect(chatMock).toHaveBeenCalledWith({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'model-id', + options: { + frequency_penalty: undefined, + presence_penalty: undefined, + temperature: undefined, + top_p: undefined, + }, + stream: true, + }); + expect(response).toBeInstanceOf(Response); + }); + + it('should throw AgentRuntimeError when Ollama client chat method throws an error', async () => { + const errorMock = { + message: 'Chat error', + name: 'ChatError', + status_code: 500, + }; + vi.mocked(Ollama.prototype.chat).mockRejectedValue(errorMock); + + const payload = { + messages: [{ content: 'Hello', role: 'user' }], + model: 'model-id', + }; + + try { + await ollamaAI.chat(payload as any); + } catch (e) { + expect(e).toEqual( + AgentRuntimeError.chat({ + error: errorMock, + errorType: AgentRuntimeErrorType.OllamaBizError, + provider: ModelProvider.Ollama, + }), + ); + } + }); + + it('should abort the request when signal aborts', async () => { + const abortMock = vi.fn(); + vi.mocked(Ollama.prototype.abort).mockImplementation(abortMock); + + const payload = { + messages: [{ content: 'Hello', role: 'user' }], + model: 'model-id', + }; + const options = { signal: new AbortController().signal }; + + ollamaAI.chat(payload as any, options); + + options.signal.dispatchEvent(new Event('abort')); + + expect(abortMock).toHaveBeenCalled(); + }); + }); + + describe('models', () => { + it('should call Ollama client list method and return ChatModelCard array', async () => { + const listMock = vi.fn().mockResolvedValue({ + models: [{ name: 'model-1' }, { name: 'model-2' }], + }); + vi.mocked(Ollama.prototype.list).mockImplementation(listMock); + + const models = await ollamaAI.models(); + + expect(listMock).toHaveBeenCalled(); + expect(models).toEqual([{ id: 'model-1' }, { id: 'model-2' }]); + }); + }); + + describe('buildOllamaMessages', () => { + it('should convert OpenAIChatMessage array to OllamaMessage array', () => { + const messages = [ + { content: 'Hello', role: 'user' }, + { content: 'Hi there!', role: 'assistant' }, + ]; + + const ollamaMessages = ollamaAI['buildOllamaMessages'](messages as any); + + expect(ollamaMessages).toEqual([ + { content: 'Hello', role: 'user' }, + { content: 'Hi there!', role: 'assistant' }, + ]); + }); + }); + + describe('convertContentToOllamaMessage', () => { + it('should convert string content to OllamaMessage', () => { + const message = { content: 'Hello', role: 'user' }; + + const ollamaMessage = ollamaAI['convertContentToOllamaMessage'](message as any); + + expect(ollamaMessage).toEqual({ content: 'Hello', role: 'user' }); + }); + + it('should convert text content to OllamaMessage', () => { + const message = { + content: [{ type: 'text', text: 'Hello' }], + role: 'user', + }; + + const ollamaMessage = ollamaAI['convertContentToOllamaMessage'](message as any); + + expect(ollamaMessage).toEqual({ content: 'Hello', role: 'user' }); + }); + + it('should convert image_url content to OllamaMessage with images', () => { + const message = { + content: [ + { + type: 'image_url', + image_url: { url: '' }, + }, + ], + role: 'user', + }; + + const ollamaMessage = ollamaAI['convertContentToOllamaMessage'](message as any); + + expect(ollamaMessage).toEqual({ + content: '', + role: 'user', + images: ['abc123'], + }); + }); + + it('should ignore invalid image_url content', () => { + const message = { + content: [ + { + type: 'image_url', + image_url: { url: 'invalid-url' }, + }, + ], + role: 'user', + }; + + const ollamaMessage = ollamaAI['convertContentToOllamaMessage'](message as any); + + expect(ollamaMessage).toEqual({ + content: '', + role: 'user', + }); + }); + }); +}); diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts index 621bd94f8583..df2ae76aed44 100644 --- a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts +++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts @@ -30,13 +30,6 @@ const LobeMockProvider = LobeOpenAICompatibleFactory({ if (error.status === 403) return { error, errorType: AgentRuntimeErrorType.LocationNotSupportError }; }, - handlePayload: (payload) => { - return { - ...payload, - // disable stream for tools due to groq dont support - stream: !payload.tools, - } as any; - }, }, debug: { chatCompletion: () => process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION === '1', @@ -71,7 +64,7 @@ describe('LobeOpenAICompatibleFactory', () => { }); describe('chat', () => { - it('should return a StreamingTextResponse on successful API call', async () => { + it('should return a Response on successful API call', async () => { // Arrange const mockStream = new ReadableStream(); const mockResponse = Promise.resolve(mockStream); @@ -120,6 +113,131 @@ describe('LobeOpenAICompatibleFactory', () => { expect(result).toBeInstanceOf(Response); }); + describe('streaming response', () => { + it('should handle multiple data chunks correctly', async () => { + const mockStream = new ReadableStream({ + start(controller) { + controller.enqueue({ + id: 'a', + object: 'chat.completion.chunk', + created: 1709125675, + model: 'mistralai/mistral-7b-instruct:free', + system_fingerprint: 'fp_86156a94a0', + choices: [ + { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null }, + ], + }); + controller.close(); + }, + }); + vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue( + mockStream as any, + ); + + const result = await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + + const decoder = new TextDecoder(); + const reader = result.body!.getReader(); + expect(decoder.decode((await reader.read()).value)).toEqual('id: a\n'); + expect(decoder.decode((await reader.read()).value)).toEqual('event: text\n'); + expect(decoder.decode((await reader.read()).value)).toEqual('data: "hello"\n\n'); + expect((await reader.read()).done).toBe(true); + }); + + it('should transform non-streaming response to stream correctly', async () => { + const mockResponse: OpenAI.ChatCompletion = { + id: 'a', + object: 'chat.completion', + created: 123, + model: 'mistralai/mistral-7b-instruct:free', + choices: [ + { + index: 0, + message: { role: 'assistant', content: 'Hello' }, + finish_reason: 'stop', + logprobs: null, + }, + ], + usage: { + prompt_tokens: 5, + completion_tokens: 5, + total_tokens: 10, + }, + }; + vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue( + mockResponse as any, + ); + + const result = await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + stream: false, + }); + + const decoder = new TextDecoder(); + + const reader = result.body!.getReader(); + expect(decoder.decode((await reader.read()).value)).toContain('id: a\n'); + expect(decoder.decode((await reader.read()).value)).toContain('event: text\n'); + expect(decoder.decode((await reader.read()).value)).toContain('data: "Hello"\n\n'); + + expect(decoder.decode((await reader.read()).value)).toContain('id: a\n'); + expect(decoder.decode((await reader.read()).value)).toContain('event: text\n'); + expect(decoder.decode((await reader.read()).value)).toContain(''); + + expect((await reader.read()).done).toBe(true); + }); + }); + + describe('handlePayload option', () => { + it('should modify request payload correctly', async () => { + const mockCreateMethod = vi.spyOn(instance['client'].chat.completions, 'create'); + + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + + expect(mockCreateMethod).toHaveBeenCalledWith( + expect.objectContaining({ + // 根据实际的 handlePayload 函数,添加断言 + }), + expect.anything(), + ); + }); + }); + + describe('cancel request', () => { + it('should cancel ongoing request correctly', async () => { + const controller = new AbortController(); + const mockCreateMethod = vi.spyOn(instance['client'].chat.completions, 'create'); + + instance.chat( + { + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }, + { signal: controller.signal }, + ); + + controller.abort(); + + expect(mockCreateMethod).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + signal: controller.signal, + }), + ); + }); + }); + describe('Error', () => { it('should return bizErrorType with an openai error response when OpenAI.APIError is thrown', async () => { // Arrange @@ -233,6 +351,27 @@ describe('LobeOpenAICompatibleFactory', () => { } }); + describe('handleError option', () => { + it('should return correct error type for 403 status code', async () => { + const error = { status: 403 }; + vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(error); + + try { + await instance.chat({ + messages: [{ content: 'Hello', role: 'user' }], + model: 'mistralai/mistral-7b-instruct:free', + temperature: 0, + }); + } catch (e) { + expect(e).toEqual({ + error, + errorType: AgentRuntimeErrorType.LocationNotSupportError, + provider, + }); + } + }); + }); + it('should throw an InvalidOpenRouterAPIKey error type on 401 status code', async () => { // Mock the API call to simulate a 401 error const error = new Error('Unauthorized') as any; From b047563456803ae06bc9b859ad85e60314ff63c8 Mon Sep 17 00:00:00 2001 From: arvinxx Date: Sat, 11 May 2024 20:25:31 +0800 Subject: [PATCH 21/24] =?UTF-8?q?=E2=9C=85=20test:=20add=20more=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../__snapshots__/index.test.ts.snap | 886 ++ .../togetherai/fixtures/models.json | 8111 +++++++++++++++++ .../agent-runtime/togetherai/index.test.ts | 16 +- .../utils/anthropicHelpers.test.ts | 119 + .../agent-runtime/utils/debugStream.test.ts | 70 + .../utils/streams/anthropic.test.ts | 197 + .../utils/streams/bedrock/llama.test.ts | 196 + .../utils/streams/google-ai.test.ts | 97 + .../utils/streams/ollama.test.ts | 77 + .../utils/streams/openai.test.ts | 263 + src/store/tool/slices/builtin/action.test.ts | 90 + src/utils/fetch.test.ts | 155 +- src/utils/fetch.ts | 3 +- 13 files changed, 10277 insertions(+), 3 deletions(-) create mode 100644 src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap create mode 100644 src/libs/agent-runtime/togetherai/fixtures/models.json create mode 100644 src/libs/agent-runtime/utils/debugStream.test.ts create mode 100644 src/libs/agent-runtime/utils/streams/anthropic.test.ts create mode 100644 src/libs/agent-runtime/utils/streams/bedrock/llama.test.ts create mode 100644 src/libs/agent-runtime/utils/streams/google-ai.test.ts create mode 100644 src/libs/agent-runtime/utils/streams/ollama.test.ts create mode 100644 src/libs/agent-runtime/utils/streams/openai.test.ts create mode 100644 src/store/tool/slices/builtin/action.test.ts diff --git a/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap b/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap new file mode 100644 index 000000000000..fb029458090a --- /dev/null +++ b/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap @@ -0,0 +1,886 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`LobeTogetherAI > models > should get models 1`] = ` +[ + { + "description": "This model is a 75/25 merge of Chronos (13B) and Nous Hermes (13B) models resulting in having a great ability to produce evocative storywriting and follow a narrative.", + "displayName": "Chronos Hermes (13B)", + "enabled": false, + "functionCall": false, + "id": "Austism/chronos-hermes-13b", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "MythoLogic-L2 and Huginn merge using a highly experimental tensor type merge technique. The main difference with MythoMix is that I allowed more of Huginn to intermingle with the single tensors located at the front and end of a model", + "displayName": "MythoMax-L2 (13B)", + "enabled": false, + "functionCall": false, + "id": "Gryphe/MythoMax-L2-13b", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "first Nous collection of dataset and models made by fine-tuning mostly on data created by Nous in-house", + "displayName": "Nous Capybara v1.9 (7B)", + "enabled": false, + "functionCall": false, + "id": "NousResearch/Nous-Capybara-7B-V1p9", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "Nous Hermes 2 on Mistral 7B DPO is the new flagship 7B Hermes! This model was DPO'd from Teknium/OpenHermes-2.5-Mistral-7B and has improved across the board on all benchmarks tested - AGIEval, BigBench Reasoning, GPT4All, and TruthfulQA.", + "displayName": "Nous Hermes 2 - Mistral DPO (7B)", + "enabled": false, + "functionCall": false, + "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Nous Hermes 2 Mixtral 7bx8 DPO is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.", + "displayName": "Nous Hermes 2 - Mixtral 8x7B-DPO ", + "enabled": true, + "functionCall": false, + "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Nous Hermes 2 Mixtral 7bx8 SFT is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.", + "displayName": "Nous Hermes 2 - Mixtral 8x7B-SFT", + "enabled": false, + "functionCall": false, + "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Nous Hermes 2 - Yi-34B is a state of the art Yi Fine-tune", + "displayName": "Nous Hermes-2 Yi (34B)", + "enabled": true, + "functionCall": false, + "id": "NousResearch/Nous-Hermes-2-Yi-34B", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Nous-Hermes-Llama2-13b is a state-of-the-art language model fine-tuned on over 300,000 instructions.", + "displayName": "Nous Hermes Llama-2 (13B)", + "enabled": false, + "functionCall": false, + "id": "NousResearch/Nous-Hermes-Llama2-13b", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Nous-Hermes-Llama2-7b is a state-of-the-art language model fine-tuned on over 300,000 instructions.", + "displayName": "Nous Hermes LLaMA-2 (7B)", + "enabled": false, + "functionCall": false, + "id": "NousResearch/Nous-Hermes-llama-2-7b", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "An OpenOrca dataset fine-tune on top of Mistral 7B by the OpenOrca team.", + "displayName": "OpenOrca Mistral (7B) 8K", + "enabled": false, + "functionCall": false, + "id": "Open-Orca/Mistral-7B-OpenOrca", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "displayName": "Qwen 1.5 Chat (0.5B)", + "enabled": false, + "functionCall": false, + "id": "Qwen/Qwen1.5-0.5B-Chat", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "displayName": "Qwen 1.5 Chat (1.8B)", + "enabled": false, + "functionCall": false, + "id": "Qwen/Qwen1.5-1.8B-Chat", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "displayName": "Qwen 1.5 Chat (110B)", + "enabled": false, + "functionCall": false, + "id": "Qwen/Qwen1.5-110B-Chat", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "displayName": "Qwen 1.5 Chat (14B)", + "enabled": true, + "functionCall": false, + "id": "Qwen/Qwen1.5-14B-Chat", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "displayName": "Qwen 1.5 Chat (32B)", + "enabled": false, + "functionCall": false, + "id": "Qwen/Qwen1.5-32B-Chat", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "displayName": "Qwen 1.5 Chat (4B)", + "enabled": false, + "functionCall": false, + "id": "Qwen/Qwen1.5-4B-Chat", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "displayName": "Qwen 1.5 Chat (72B)", + "enabled": true, + "functionCall": false, + "id": "Qwen/Qwen1.5-72B-Chat", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "displayName": "Qwen 1.5 Chat (7B)", + "enabled": true, + "functionCall": false, + "id": "Qwen/Qwen1.5-7B-Chat", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Arctic is a dense-MoE Hybrid transformer architecture pre-trained from scratch by the Snowflake AI Research Team.", + "displayName": "Snowflake Arctic Instruct", + "enabled": false, + "functionCall": false, + "id": "Snowflake/snowflake-arctic-instruct", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Re:MythoMax (ReMM) is a recreation trial of the original MythoMax-L2-B13 with updated models. This merge use SLERP [TESTING] to merge ReML and Huginn v1.2.", + "displayName": "ReMM SLERP L2 (13B)", + "enabled": false, + "functionCall": false, + "id": "Undi95/ReMM-SLERP-L2-13B", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "A merge of models built by Undi95 with the new task_arithmetic merge method from mergekit.", + "displayName": "Toppy M (7B)", + "enabled": false, + "functionCall": false, + "id": "Undi95/Toppy-M-7B", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities", + "displayName": "WizardLM v1.2 (13B)", + "enabled": false, + "functionCall": false, + "id": "WizardLM/WizardLM-13B-V1.2", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "The OLMo models are trained on the Dolma dataset", + "displayName": "OLMo Instruct (7B)", + "enabled": false, + "functionCall": false, + "id": "allenai/OLMo-7B-Instruct", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "displayName": "Code Llama Instruct (13B)", + "enabled": false, + "functionCall": false, + "id": "codellama/CodeLlama-13b-Instruct-hf", + "maxOutput": 16384, + "tokens": 16384, + "vision": false, + }, + { + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "displayName": "Code Llama Instruct (34B)", + "enabled": false, + "functionCall": false, + "id": "codellama/CodeLlama-34b-Instruct-hf", + "maxOutput": 16384, + "tokens": 16384, + "vision": false, + }, + { + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "displayName": "Code Llama Instruct (70B)", + "enabled": false, + "functionCall": false, + "id": "codellama/CodeLlama-70b-Instruct-hf", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "displayName": "Code Llama Instruct (7B)", + "enabled": false, + "functionCall": false, + "id": "codellama/CodeLlama-7b-Instruct-hf", + "maxOutput": 16384, + "tokens": 16384, + "vision": false, + }, + { + "description": "This Dolphin is really good at coding, I trained with a lot of coding data. It is very obedient but it is not DPO tuned - so you still might need to encourage it in the system prompt as I show in the below examples.", + "displayName": "Dolphin 2.5 Mixtral 8x7b", + "enabled": false, + "functionCall": false, + "id": "cognitivecomputations/dolphin-2.5-mixtral-8x7b", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "DBRX Instruct is a mixture-of-experts (MoE) large language model trained from scratch by Databricks. DBRX Instruct specializes in few-turn interactions.", + "displayName": "DBRX Instruct", + "enabled": false, + "functionCall": false, + "id": "databricks/dbrx-instruct", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.", + "displayName": "Deepseek Coder Instruct (33B)", + "enabled": true, + "functionCall": false, + "id": "deepseek-ai/deepseek-coder-33b-instruct", + "maxOutput": 16384, + "tokens": 16384, + "vision": false, + }, + { + "description": "trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese", + "displayName": "DeepSeek LLM Chat (67B)", + "enabled": false, + "functionCall": false, + "id": "deepseek-ai/deepseek-llm-67b-chat", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "An instruction fine-tuned LLaMA-2 (70B) model by merging Platypus2 (70B) by garage-bAInd and LLaMA-2 Instruct v2 (70B) by upstage.", + "displayName": "Platypus2 Instruct (70B)", + "enabled": false, + "functionCall": false, + "id": "garage-bAInd/Platypus2-70B-instruct", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", + "displayName": "Gemma Instruct (2B)", + "enabled": true, + "functionCall": false, + "id": "google/gemma-2b-it", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", + "displayName": "Gemma Instruct (7B)", + "enabled": true, + "functionCall": false, + "id": "google/gemma-7b-it", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.", + "displayName": "Vicuna v1.5 (13B)", + "enabled": false, + "functionCall": false, + "id": "lmsys/vicuna-13b-v1.5", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.", + "displayName": "Vicuna v1.5 (7B)", + "enabled": false, + "functionCall": false, + "id": "lmsys/vicuna-7b-v1.5", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "displayName": "LLaMA-2 Chat (13B)", + "enabled": true, + "functionCall": false, + "id": "meta-llama/Llama-2-13b-chat-hf", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "displayName": "LLaMA-2 Chat (70B)", + "enabled": false, + "functionCall": false, + "id": "meta-llama/Llama-2-70b-chat-hf", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "displayName": "LLaMA-2 Chat (7B)", + "enabled": false, + "functionCall": false, + "id": "meta-llama/Llama-2-7b-chat-hf", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", + "displayName": "Meta Llama 3 70B Instruct", + "enabled": false, + "functionCall": false, + "id": "meta-llama/Llama-3-70b-chat-hf", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", + "displayName": "Meta Llama 3 8B Instruct", + "enabled": false, + "functionCall": false, + "id": "meta-llama/Llama-3-8b-chat-hf", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "WizardLM-2 8x22B is Wizard's most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models.", + "displayName": "WizardLM-2 (8x22B)", + "enabled": false, + "functionCall": false, + "id": "microsoft/WizardLM-2-8x22B", + "maxOutput": 65536, + "tokens": 65536, + "vision": false, + }, + { + "description": "instruct fine-tuned version of Mistral-7B-v0.1", + "displayName": "Mistral (7B) Instruct", + "enabled": false, + "functionCall": false, + "id": "mistralai/Mistral-7B-Instruct-v0.1", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.", + "displayName": "Mistral (7B) Instruct v0.2", + "enabled": false, + "functionCall": false, + "id": "mistralai/Mistral-7B-Instruct-v0.2", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.", + "displayName": "Mixtral-8x22B Instruct v0.1", + "enabled": false, + "functionCall": false, + "id": "mistralai/Mixtral-8x22B-Instruct-v0.1", + "maxOutput": 65536, + "tokens": 65536, + "vision": false, + }, + { + "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.", + "displayName": "Mixtral-8x7B Instruct v0.1", + "enabled": true, + "functionCall": false, + "id": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "A merge of OpenChat 3.5 was trained with C-RLFT on a collection of publicly available high-quality instruction data, with a custom processing pipeline.", + "displayName": "OpenChat 3.5", + "enabled": false, + "functionCall": false, + "id": "openchat/openchat-3.5-1210", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "A state-of-the-art model by Snorkel AI, DPO fine-tuned on Mistral-7B", + "displayName": "Snorkel Mistral PairRM DPO (7B)", + "enabled": false, + "functionCall": false, + "id": "snorkelai/Snorkel-Mistral-PairRM-DPO", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "State of the art Mistral Fine-tuned on extensive public datasets", + "displayName": "OpenHermes-2-Mistral (7B)", + "enabled": false, + "functionCall": false, + "id": "teknium/OpenHermes-2-Mistral-7B", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "Continuation of OpenHermes 2 Mistral model trained on additional code datasets", + "displayName": "OpenHermes-2.5-Mistral (7B)", + "enabled": false, + "functionCall": false, + "id": "teknium/OpenHermes-2p5-Mistral-7B", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations, instruction tuned by Together", + "displayName": "LLaMA-2-7B-32K-Instruct (7B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/Llama-2-7B-32K-Instruct", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-7B-v1 base model.", + "displayName": "RedPajama-INCITE Chat (7B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/RedPajama-INCITE-7B-Chat", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-3B-v1 base model.", + "displayName": "RedPajama-INCITE Chat (3B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers", + "displayName": "StripedHyena Nous (7B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/StripedHyena-Nous-7B", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations. ", + "displayName": "Alpaca (7B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/alpaca-7b", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling", + "displayName": "Upstage SOLAR Instruct v1 (11B)", + "enabled": false, + "functionCall": false, + "id": "upstage/SOLAR-10.7B-Instruct-v1.0", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "The Yi series models are large language models trained from scratch by developers at 01.AI", + "displayName": "01-ai Yi Chat (34B)", + "enabled": true, + "functionCall": false, + "id": "zero-one-ai/Yi-34B-Chat", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", + "displayName": "Llama3 8B Chat HF INT4", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/Llama-3-8b-chat-hf-int4", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", + "displayName": "Togethercomputer Llama3 8B Instruct Int8", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/Llama-3-8b-chat-hf-int8", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "Chat model based on EleutherAI’s Pythia-7B model, and is fine-tuned with data focusing on dialog-style interactions.", + "displayName": "Pythia-Chat-Base (7B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/Pythia-Chat-Base-7B-v0.16", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.", + "displayName": "MPT-Chat (30B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/mpt-30b-chat", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.", + "displayName": "Koala (7B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/Koala-7B", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "An instruction-following LLM based on pythia-12b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.", + "displayName": "Dolly v2 (12B)", + "enabled": false, + "functionCall": false, + "id": "databricks/dolly-v2-12b", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "An instruction-following LLM based on pythia-3b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.", + "displayName": "Dolly v2 (3B)", + "enabled": false, + "functionCall": false, + "id": "databricks/dolly-v2-3b", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.", + "displayName": "Guanaco (65B) ", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/guanaco-65b", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Chatbot trained by fine-tuning Flan-t5-xl on user-shared conversations collected from ShareGPT.", + "displayName": "Vicuna-FastChat-T5 (3B)", + "enabled": false, + "functionCall": false, + "id": "lmsys/fastchat-t5-3b-v1.0", + "maxOutput": 512, + "tokens": 512, + "vision": false, + }, + { + "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ", + "displayName": "Open-Assistant StableLM SFT-7 (7B)", + "enabled": false, + "functionCall": false, + "id": "OpenAssistant/stablelm-7b-sft-v7-epoch-3", + "maxOutput": 4096, + "tokens": 4096, + "vision": true, + }, + { + "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.", + "displayName": "MPT-Chat (7B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/mpt-7b-chat", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ", + "displayName": "Open-Assistant Pythia SFT-4 (12B)", + "enabled": false, + "functionCall": false, + "id": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", + "maxOutput": 2048, + "tokens": 2048, + "vision": true, + }, + { + "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.", + "displayName": "Vicuna v1.3 (7B)", + "enabled": false, + "functionCall": false, + "id": "lmsys/vicuna-7b-v1.3", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Nous-Hermes-Llama2-70b is a state-of-the-art language model fine-tuned on over 300,000 instructions.", + "displayName": "Nous Hermes LLaMA-2 (70B)", + "enabled": false, + "functionCall": false, + "id": "NousResearch/Nous-Hermes-Llama2-70b", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.", + "displayName": "Vicuna v1.5 16K (13B)", + "enabled": false, + "functionCall": false, + "id": "lmsys/vicuna-13b-v1.5-16k", + "maxOutput": 16384, + "tokens": 16384, + "vision": false, + }, + { + "description": "Chat model fine-tuned from EleutherAI’s GPT-NeoX with over 40 million instructions on carbon reduced compute.", + "displayName": "GPT-NeoXT-Chat-Base (20B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/GPT-NeoXT-Chat-Base-20B", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "A fine-tuned version of Mistral-7B to act as a helpful assistant.", + "displayName": "Zephyr-7B-ß", + "enabled": false, + "functionCall": false, + "id": "HuggingFaceH4/zephyr-7b-beta", + "maxOutput": 32768, + "tokens": 32768, + "vision": false, + }, + { + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "displayName": "Code Llama Instruct (7B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/CodeLlama-7b-Instruct", + "maxOutput": 16384, + "tokens": 16384, + "vision": false, + }, + { + "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.", + "displayName": "Guanaco (13B) ", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/guanaco-13b", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "displayName": "LLaMA-2 Chat (70B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/llama-2-70b-chat", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "displayName": "Code Llama Instruct (34B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/CodeLlama-34b-Instruct", + "maxOutput": 16384, + "tokens": 16384, + "vision": false, + }, + { + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "displayName": "Code Llama Instruct (13B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/CodeLlama-13b-Instruct", + "maxOutput": 16384, + "tokens": 16384, + "vision": false, + }, + { + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "displayName": "LLaMA-2 Chat (13B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/llama-2-13b-chat", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.", + "displayName": "Vicuna v1.3 (13B)", + "enabled": false, + "functionCall": false, + "id": "lmsys/vicuna-13b-v1.3", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Fine-tuned from StarCoder to act as a helpful coding assistant. As an alpha release is only intended for educational or research purpopses.", + "displayName": "StarCoderChat Alpha (16B)", + "enabled": false, + "functionCall": false, + "id": "HuggingFaceH4/starchat-alpha", + "maxOutput": 8192, + "tokens": 8192, + "vision": false, + }, + { + "description": "An instruction-following LLM based on pythia-7b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.", + "displayName": "Dolly v2 (7B)", + "enabled": false, + "functionCall": false, + "id": "databricks/dolly-v2-7b", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.", + "displayName": "Guanaco (33B) ", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/guanaco-33b", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.", + "displayName": "Koala (13B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/Koala-13B", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, + { + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "displayName": "LLaMA-2 Chat (7B)", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/llama-2-7b-chat", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling", + "displayName": "Upstage SOLAR Instruct v1 (11B)-Int4", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4", + "maxOutput": 4096, + "tokens": 4096, + "vision": false, + }, + { + "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks. ", + "displayName": "Guanaco (7B) ", + "enabled": false, + "functionCall": false, + "id": "togethercomputer/guanaco-7b", + "maxOutput": 2048, + "tokens": 2048, + "vision": false, + }, +] +`; diff --git a/src/libs/agent-runtime/togetherai/fixtures/models.json b/src/libs/agent-runtime/togetherai/fixtures/models.json new file mode 100644 index 000000000000..561197f99bb3 --- /dev/null +++ b/src/libs/agent-runtime/togetherai/fixtures/models.json @@ -0,0 +1,8111 @@ +[ + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64e831864b84b428b8d322d0", + "name": "Austism/chronos-hermes-13b", + "display_name": "Chronos Hermes (13B)", + "display_type": "chat", + "description": "This model is a 75/25 merge of Chronos (13B) and Nous Hermes (13B) models resulting in having a great ability to produce evocative storywriting and follow a narrative.", + "license": "other", + "creator_organization": "Austism", + "hardware_label": "2x A100 80GB", + "num_parameters": 13000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 2048, + "config": { + "stop": [""], + "prompt_format": "### Instruction:\n{prompt}\n### Response:\n", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}", + "add_generation_prompt": true + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-08-24T17:08:25.379Z", + "update_at": "2023-08-24T17:08:25.379Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x6966f4A2caf8efaE98C251C3C15210333578C158": 1 }, + "asks_updated": "2024-05-11T12:20:53.91543414Z", + "gpus": { "": 0 }, + "qps": 0.06666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 34.53333333333333, + "throughput_out": 0.5333333333333333, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.043478260869565216, + "qps": 0.06666666666666667, + "throughput_in": 34.53333333333333, + "throughput_out": 0.5333333333333333, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6560b993b56cf1e0970c9b1a", + "name": "BAAI/bge-base-en-v1.5", + "display_name": "BAAI-Bge-Base-1p5", + "display_type": "embedding", + "description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding", + "license": "MIT", + "creator_organization": "BAAI", + "hardware_label": "A40", + "pricing_tier": "Featured", + "num_parameters": 109482240, + "release_date": "2023-11-15T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 }, + "created_at": "2023-11-24T14:56:19.475Z", + "update_at": "2023-12-22T03:26:23.802Z", + "instances": [ + { "avzone": "us-central-2a", "cluster": "jollyllama" }, + { "avzone": "us-central-1a", "cluster": "sassyseal" } + ], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 3, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x18530141Cf50876b091f3D4B9FA3Bb7F7d24d20a": 1, + "0x4Aa34b8d92E163D7d7527e17B92Bc83C2F7149a3": 1, + "0x8BEE38fD0697C19F06411AaEEea935073005168c": 1, + "0xe2d9B1fd3EfBA3fEB7cfc84FD5d9c1621dA3dEB9": 1 + }, + "asks_updated": "2024-05-11T03:12:34.75168084Z", + "gpus": { "": 0 }, + "qps": 3.0666666666666664, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 254, + "stats": [ + { + "avzone": "us-central-2a", + "cluster": "jollyllama", + "capacity": 0.008075842696629214, + "qps": 1.7333333333333334, + "throughput_in": 137.2, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0.008046875, + "qps": 1.3333333333333333, + "throughput_in": 116.8, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6560b938b56cf1e0970c9b19", + "name": "BAAI/bge-large-en-v1.5", + "display_name": "BAAI-Bge-Large-1p5", + "display_type": "embedding", + "description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding", + "license": "MIT", + "creator_organization": "BAAI", + "hardware_label": "A40", + "pricing_tier": "Featured", + "num_parameters": 335141888, + "release_date": "2023-11-15T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "pricing": { "hourly": 0, "input": 4, "output": 4, "finetune": 0, "base": 0 }, + "created_at": "2023-11-24T14:54:48.986Z", + "update_at": "2023-12-22T03:27:18.465Z", + "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 4, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x5ED0BA75594E3429628087603D628838bE686ebF": 1, + "0x7153b499cA3C6cc2Bb60Dd5DBF8ba0C6B2532c63": 1, + "0xD2a55c4769d98e7Df019A3858FA37036BbbAB5cE": 1, + "0xF6122ecAc4D8d96a95E00d6eC8a838f4525D8124": 1 + }, + "asks_updated": "2024-05-11T03:00:56.495347114Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-2a", + "cluster": "jollyllama", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64f78861d683768020b9f005", + "name": "Gryphe/MythoMax-L2-13b", + "display_name": "MythoMax-L2 (13B)", + "display_type": "chat", + "description": "MythoLogic-L2 and Huginn merge using a highly experimental tensor type merge technique. The main difference with MythoMix is that I allowed more of Huginn to intermingle with the single tensors located at the front and end of a model", + "license": "other", + "creator_organization": "Gryphe", + "hardware_label": "1x A40 48GB", + "num_parameters": 13000000000, + "release_date": "2023-08-01T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "stop": [""], + "add_generation_prompt": true, + "prompt_format": "### Instruction:\n{prompt}\n### Response:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:' }}" + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-09-05T19:58:25.683Z", + "update_at": "2023-09-05T19:58:25.683Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 30, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x007fAfa7e8774c40929B946474B0de5288eC6C41": 1, + "0x037DBdcEDb5C34a4fcB41Ab8AaD56b5815bE02DE": 1, + "0x05a4E02cc4748e92338DCE88e22D81374fD300C9": 1, + "0x17957d0c98323Cec3B42BA4a5C0503C5B7114317": 1, + "0x1C28d22406B7acff59f57120DcF98685fed4E6d1": 1, + "0x2Da6d7d2f5810221C572Dea0A4C56D117913ba60": 1, + "0x2F84CaD2c29FAf002787cBc27A7749871dB843F5": 1, + "0x50CA731E79882f073e0550c7B4177EF21A20226b": 1, + "0x705CE19b5A6BfA9739Ce9160B1DCcaD9c83D9D7e": 1, + "0x7101FDCAa53c7E8fF969F4A5Bab72311A9f1a1cf": 1, + "0x7986A72CA1d6dE9bD9b1e0ec349a13c92678193b": 1, + "0x80Ec6D391649f097c1af115be95f5e67EDD4C86E": 1, + "0x80c2a4602548641b57f48504Ac182e13b2895b87": 1, + "0x844EE8641055BDc3A4D448782E0B2e582688cF7c": 1, + "0x866abAD0f44b6C608DF925b864d73D0b0eCb6FAb": 1, + "0x8993bDAC643F3500a20c0DdA18af1f6535840aF6": 1, + "0x8ef1AD0c945EDD56CE215c751c4d59BE6e7Ba8E5": 1, + "0x9C10b5fe06098EE4475c055A598b03D8AE228B1B": 1, + "0x9D76E8FD91d1Ccf7B19e1AbE10144f2721eA5E8F": 1, + "0xA059d967aFA12e9B85eC2ABF7930D09aefe789E8": 1, + "0xA5CEf1fA8Dd68B4A185CD38903B0CDfFA343182a": 1, + "0xA5De493e5FC052EB73126c793d73888a72Ba7BeC": 1, + "0xB53B799b1bF6B2cAd3fe831FE54fEC5fF0E13fcC": 1, + "0xC28d7EF7781A76ba9486E0e1C143CacF2A88d2C5": 1, + "0xD613d94dd0aE5d761eDc0f27c8d01F52439B4036": 1, + "0xaf9Db30c926d4e48c1314452Ed7C83b05B4a071b": 1, + "0xb4B6C15Dd02deE2a6b9532FD9517bF6e5CA8f313": 1, + "0xb6F4896c742DdC850AE9f0070B66F1F4608fe397": 1, + "0xd2D502b3e57cA8D2E99B807FbE1930f9eE6F44d0": 1, + "0xe58DA790EF3BA3FfbD032b3f7f13101De142C68e": 1 + }, + "asks_updated": "2024-05-11T00:54:42.711586793Z", + "gpus": { "": 0 }, + "qps": 13.666666666666666, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 21538.133333333335, + "throughput_out": 1850.3333333333333, + "error_rate": 0.06666666666666667, + "retry_rate": 0.13333333333333333, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0.048909090909090264, + "qps": 13.666666666666666, + "throughput_in": 21538.133333333335, + "throughput_out": 1850.3333333333333, + "error_rate": 0.06666666666666667, + "retry_rate": 0.13333333333333333 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6573540efe9a5c79871809ce", + "name": "Meta-Llama/Llama-Guard-7b", + "display_name": "Llama Guard (7B)", + "display_type": "moderation", + "description": "Llama Guard: LLM-based Input-Output Safeguard for Human-AI Conversations", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/meta-llama/LlamaGuard-7b", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "chat_template": "{{ '[INST] Task: Check if there is unsafe content in \\'' }}{% if messages[messages.length-1]['role'] == 'user' %}{{ 'User' }}{% else %}{{ 'Agent' }}{% endif %}{{ '\\' messages in conversations according our safety policy with the below categories.\n\n\nO1: Violence and Hate. \nShould not\n - Help people plan or engage in violence.\n - Encourage people to engage in violence.\n - Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability.\n - Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.\nCan\n - Provide information on violence and discrimination.\n - Discuss the topics of hate and violence, and can discuss historical events involving violence.\nO2: Sexual Content. \nShould not\n - Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities.\nCan\n - Discuss topics related to sexuality and sexual education.\n - Provide general information regarding sexual activities.\n - Provide guidance on topics related to sexual health.\nO3: Criminal Planning. \nShould not\n - Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.\n - Encourage people to engage in criminal activities or condone criminal behavior.\nCan\n - Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes.\n - Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems.\nO4: Guns and Illegal Weapons. \nShould not\n - Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them.\n - Encourage people to engage in firearm or illegal weapons crimes or condone such behavior.\nCan\n - Discuss firearms and the arguments for and against firearm ownership.\nO5: Regulated or Controlled Substances. \nShould not\n - Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis.\n - Assist or encourage people to create such substances when it is illegal to do so.\nCan\n - Discuss the use of regulated or controlled substances, and can provide general information on those substances (ex: what their chemical composition is and what their effects are).\n - Provide information on how those substances are manufactured, provided that the information is not so detailed as to provide step-by-step instructions for their creation.\nO6: Self-Harm. \nShould not\n - Encourage people to harm themselves.\n - Romanticize or condone self-harm or suicide.\n - Provide information regarding the methods of suicide or self-harm.\n - Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves.\nCan\n - Provide general information regarding self-harm and suicide (ex: statistics, the mental health challenges faced by those who take these actions).\nShould\n - Provide information regarding appropriate health resources to people who imply or express the desire to harm themselves.\n\n\n\n\n' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% else %}{{ 'Agent: ' + message['content'] + '\n\n' }}{% endif %}{% endfor %}{{ '\n\nProvide your safety assessment for '}}{% if messages[messages.length-1]['role'] == 'user' %}{{ 'User' }}{% else %}{{ 'Agent' }}{% endif %}{{' in the above conversation:\n - First line must read \\'safe\\' or \\'unsafe\\'.\n - If unsafe, a second line must include a comma-separated list of violated categories. [/INST]' }}", + "safety_label": "safe", + "safe_response": true, + "safety_config": { "max_tokens": 64 }, + "safety_categories": { + "O1": "Violence and Hate", + "O2": "Sexual Content", + "O3": "Criminal Planning", + "O4": "Guns and Illegal Weapons", + "O5": "Regulated or Controlled Substances", + "O6": "Self-Harm" + } + }, + "pricing": { "input": 6, "output": 6, "hourly": 0 }, + "update_at": "2024-04-20T23:25:17.775Z", + "instances": [ + { "avzone": "us-central-5a", "cluster": "wrigleycub" }, + { "avzone": "ap-northeast-1a", "cluster": "optimisticotter" }, + { "avzone": "us-east-2a", "cluster": "jumpyjackal" } + ], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x4Af456F8E15A15082e24E434Ad794ad9387C7169": 1, + "0x4ceB37C5700106874aA40B8DA6b7349Ab7627643": 1, + "0x7Cfb4b7470B07154eA0802dAC8f626b0F5b89faE": 1, + "0xE3bc0e43e4d3Ff1C6942C6134CfB7496A273eCdA": 1 + }, + "asks_updated": "2024-05-11T11:46:46.414181302Z", + "gpus": { "": 0 }, + "qps": 23.066666666666666, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 27473.200000000004, + "throughput_out": 52.53333333333333, + "retry_rate": 1, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0.10809523809523812, + "qps": 8.466666666666667, + "throughput_in": 10082, + "throughput_out": 18.933333333333334, + "error_rate": 0, + "retry_rate": 0.26666666666666666 + }, + { + "avzone": "ap-northeast-1a", + "cluster": "optimisticotter", + "capacity": 0.13665644171779157, + "qps": 7.466666666666667, + "throughput_in": 9073.333333333334, + "throughput_out": 17.533333333333335, + "error_rate": 0, + "retry_rate": 0.4 + }, + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.24161735700197307, + "qps": 7.133333333333334, + "throughput_in": 8317.866666666667, + "throughput_out": 16.066666666666666, + "error_rate": 0, + "retry_rate": 0.3333333333333333 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "656f5aac044c74c554a30c4f", + "name": "Nexusflow/NexusRaven-V2-13B", + "display_name": "NexusRaven (13B)", + "display_type": "language", + "description": "NexusRaven is an open-source and commercially viable function calling LLM that surpasses the state-of-the-art in function calling capabilities.", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/Nexusflow/NexusRaven-V2-13B", + "creator_organization": "Nexusflow", + "hardware_label": "A100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "13000000000", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 16384, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-12-05T17:15:24.561Z", + "update_at": "2023-12-05T17:15:24.561Z", + "instances": [{ "avzone": "ap-northeast-1a", "cluster": "optimisticotter" }], + "descriptionLink": "", + "depth": { + "num_asks": 6, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x60e899d1504136B312ebac78CCeCA47Dd62Bd267": 1, + "0x66D3F099533df45Dc154e9D10b95B1bcF1f08a03": 1, + "0x932Becec6BD385C4607889D7Ed159212A0e732F2": 1, + "0xC0251a8dB9B86a149E38c88F46912EdA9Df9f346": 1, + "0xE55822B5482FeE8B805Ad51F47f973270c8AEDe5": 1, + "0xFd1bFB3A51138c37C6f8F57D4F7AA2f2911d8CAf": 1 + }, + "asks_updated": "2024-05-10T17:13:11.525066416Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "ap-northeast-1a", + "cluster": "optimisticotter", + "capacity": 1, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65664e4d79fe5514beebd5d3", + "name": "NousResearch/Nous-Capybara-7B-V1p9", + "display_name": "Nous Capybara v1.9 (7B)", + "display_type": "chat", + "description": "first Nous collection of dataset and models made by fine-tuning mostly on data created by Nous in-house", + "license": "MIT", + "creator_organization": "NousResearch", + "hardware_label": "A100", + "pricing_tier": "Featured", + "num_parameters": 7241732096, + "release_date": "2023-11-15T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "config": { + "add_generation_prompt": true, + "stop": ["USER:", "ASSISTANT:"], + "prompt_format": "USER:\n{prompt}\nASSISTANT:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %} {{ 'USER:\n' + message['content'] + '\n' }}{% elif message['role'] == 'system' %}{{ 'SYSTEM:\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT:\n' + message['content'] + '\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:\n' }}{% endif %}{% endfor %}" + }, + "pricing": { "input": 50, "output": 50 }, + "created_at": "2023-11-28T20:32:13.026Z", + "update_at": "2023-11-28T20:33:03.163Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x88eB978d91199D40cB23871d4319d382EF40492D": 1, + "0xa6C19366D1A480921d66ec924B3513DB8F77781d": 1 + }, + "asks_updated": "2024-05-11T02:43:01.448420782Z", + "gpus": { "": 0 }, + "qps": 0.6, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 898.0666666666667, + "throughput_out": 36.2, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0.35555555555555546, + "qps": 0.6, + "throughput_in": 898.0666666666667, + "throughput_out": 36.2, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65d542a20af4aafc88716626", + "name": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO", + "display_name": "Nous Hermes 2 - Mistral DPO (7B)", + "display_type": "chat", + "description": "Nous Hermes 2 on Mistral 7B DPO is the new flagship 7B Hermes! This model was DPO'd from Teknium/OpenHermes-2.5-Mistral-7B and has improved across the board on all benchmarks tested - AGIEval, BigBench Reasoning, GPT4All, and TruthfulQA.", + "license": "apache-2.0", + "link": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mistral-7B-DPO", + "creator_organization": "NousResearch", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "stop": ["<|im_end|>"], + "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-02-21T00:24:02.387Z", + "update_at": "2024-02-21T00:24:02.387Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xEFa73cF1A2DD2Be31888913c57bf569cA27ce9E6": 1 }, + "asks_updated": "2024-05-11T05:55:30.322194054Z", + "gpus": { "": 0 }, + "qps": 0.13333333333333333, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 366.2, + "throughput_out": 20.266666666666666, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0.07326007326007326, + "qps": 0.13333333333333333, + "throughput_in": 366.2, + "throughput_out": 20.266666666666666, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a4b298fbc8405400423169", + "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", + "display_name": "Nous Hermes 2 - Mixtral 8x7B-DPO ", + "display_type": "chat", + "description": "Nous Hermes 2 Mixtral 7bx8 DPO is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.", + "license": "apache-2.0", + "link": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", + "creator_organization": "NousResearch", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "56000000000", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "stop": ["<|im_end|>", "<|im_start|>"], + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "add_generation_prompt": true, + "chat_template_name": "default" + }, + "pricing": { "input": 150, "output": 150, "hourly": 0 }, + "created_at": "2024-01-15T04:20:40.079Z", + "update_at": "2024-04-12T18:35:56.478Z", + "autopilot_pool": "cr-a100-80-2x", + "instances": [ + { "avzone": "us-south-1a", "cluster": "mustymarfa" }, + { "avzone": "us-east-1a", "cluster": "happypiglet" }, + { "avzone": "us-central-5a", "cluster": "wrigleycub" } + ], + "isFinetuned": false, + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x17B96a27Dd71A9C4687441c14d1feCA207D0D3d4": 1, + "0x1812939B682B119d362412811237da09D9bc6c8D": 1, + "0xde2F311932B19E8Aa2069302FA701f6d0fA1B574": 1 + }, + "asks_updated": "2024-05-11T00:30:10.175648127Z", + "gpus": { "": 0 }, + "qps": 0.9333333333333333, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 887.2, + "throughput_out": 13.866666666666667, + "stats": [ + { + "avzone": "us-south-1a", + "cluster": "mustymarfa", + "capacity": 0.03333333333333333, + "qps": 0.2, + "throughput_in": 301.06666666666666, + "throughput_out": 3.7333333333333334, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0.07142857142857142, + "qps": 0.2, + "throughput_in": 173.66666666666666, + "throughput_out": 2.4, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0.08333333333333333, + "qps": 0.5333333333333333, + "throughput_in": 412.46666666666664, + "throughput_out": 7.733333333333333, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a4466efbc8405400423166", + "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", + "display_name": "Nous Hermes 2 - Mixtral 8x7B-SFT", + "display_type": "chat", + "description": "Nous Hermes 2 Mixtral 7bx8 SFT is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.", + "license": "apache-2.0", + "link": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", + "creator_organization": "NousResearch", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "56000000000", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "stop": ["<|im_end|>", "<|im_start|>"], + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "add_generation_prompt": true, + "chat_template_name": "default" + }, + "pricing": { "input": 150, "output": 150, "hourly": 0 }, + "created_at": "2024-01-14T20:39:10.060Z", + "update_at": "2024-01-14T20:39:10.060Z", + "autopilot_pool": "cr-a100-80-2x", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "isFinetuned": false, + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x3805a418c9af7eA4a88C6BC519ba95223EFe87F7": 1 }, + "asks_updated": "2024-05-10T17:07:56.753575198Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "658c8dad27fb98d2edc447ff", + "name": "NousResearch/Nous-Hermes-2-Yi-34B", + "display_name": "Nous Hermes-2 Yi (34B)", + "display_type": "chat", + "description": "Nous Hermes 2 - Yi-34B is a state of the art Yi Fine-tune", + "license": "apache-2", + "creator_organization": "NousResearch", + "hardware_label": "A100", + "pricing_tier": "Featured", + "num_parameters": 34000000000, + "release_date": "2023-12-27T20:48:45.586Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "stop": ["<|im_start|>", "<|im_end|>"], + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "chat_template_name": "default", + "add_generation_prompt": true + }, + "pricing": { "input": 200, "output": 200 }, + "created_at": "2023-12-27T20:48:45.586Z", + "update_at": "2023-12-27T20:50:38.632Z", + "instances": [{ "avzone": "ap-northeast-1a", "cluster": "optimisticotter" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x1f58b29024eba2f33b3983733396b4eda0E6f976": 1 }, + "asks_updated": "2024-05-11T11:46:22.377796052Z", + "gpus": { "": 0 }, + "qps": 18.266666666666666, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 3213.866666666667, + "throughput_out": 438.8, + "stats": [ + { + "avzone": "ap-northeast-1a", + "cluster": "optimisticotter", + "capacity": 0.45881427809138686, + "qps": 18.266666666666666, + "throughput_in": 3213.866666666667, + "throughput_out": 438.8, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64cae18d3ede2fa7e2cbcc7d", + "name": "NousResearch/Nous-Hermes-Llama2-13b", + "display_name": "Nous Hermes Llama-2 (13B)", + "display_type": "chat", + "description": "Nous-Hermes-Llama2-13b is a state-of-the-art language model fine-tuned on over 300,000 instructions.", + "license": "mit", + "creator_organization": "NousResearch", + "hardware_label": "2x A100 80GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": 13000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "prompt_format": "### Instruction:\n{prompt}\n### Response:\n", + "stop": ["###", ""], + "chat_template_name": "llama", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}", + "add_generation_prompt": true + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-08-02T23:06:53.926Z", + "update_at": "2023-10-07T00:19:33.779Z", + "instances": [{ "avzone": "us-west-1a", "cluster": "curiouscrow" }], + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xfA6b8e3C0ac21BA89F8e75770251f0E4e509eF90": 1 }, + "asks_updated": "2024-05-10T17:59:32.616570629Z", + "gpus": { "": 0 }, + "qps": 1, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 1430.2, + "throughput_out": 166.06666666666666, + "stats": [ + { + "avzone": "us-west-1a", + "cluster": "curiouscrow", + "capacity": 0.336864406779661, + "qps": 1, + "throughput_in": 1430.2, + "throughput_out": 166.06666666666666, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6532f0faf94bacfc629b4cf6", + "name": "NousResearch/Nous-Hermes-llama-2-7b", + "display_name": "Nous Hermes LLaMA-2 (7B)", + "display_type": "chat", + "description": "Nous-Hermes-Llama2-7b is a state-of-the-art language model fine-tuned on over 300,000 instructions.", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/NousResearch/Nous-Hermes-llama-2-7b", + "creator_organization": "NousResearch", + "hardware_label": "A100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 6738415616, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "prompt_format": "### Instruction:\n{prompt}\n### Response:\n", + "stop": ["###", ""], + "add_generation_prompt": true, + "chat_template_name": "llama", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-10-20T21:28:26.403Z", + "update_at": "2023-10-24T17:41:52.365Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xf3AbD7152646995C204D8Bee0699AC58653De524": 1 }, + "asks_updated": "2024-05-10T16:28:20.007677485Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.06666666666666667, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6532f0faf94bacfc629b4cf5", + "name": "Open-Orca/Mistral-7B-OpenOrca", + "display_name": "OpenOrca Mistral (7B) 8K", + "display_type": "chat", + "description": "An OpenOrca dataset fine-tune on top of Mistral 7B by the OpenOrca team.", + "license": "apache-2.0", + "link": "https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca", + "creator_organization": "OpenOrca", + "hardware_label": "A100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 7241748480, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "config": { + "stop": ["<|im_end|>"], + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "add_generation_prompt": true, + "chat_template_name": "default" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-10-20T21:28:26.403Z", + "update_at": "2023-10-24T00:01:52.541Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x802be1ae9dC8F68c43a47ec3d2070F8f1B0553E8": 1 }, + "asks_updated": "2024-05-11T11:46:47.152201508Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.1111111111111111, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64fbbc5adfdb1e4b06b5d5cb", + "name": "Phind/Phind-CodeLlama-34B-v2", + "display_name": "Phind Code LLaMA v2 (34B)", + "display_type": "code", + "description": "Phind-CodeLlama-34B-v1 trained on additional 1.5B tokens high-quality programming-related data proficient in Python, C/C++, TypeScript, Java, and more.", + "license": "llama2", + "creator_organization": "Phind", + "hardware_label": "A100 80GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 33743970304, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 16384, + "config": { + "prompt_format": "### System Prompt\nYou are an intelligent programming assistant.\n\n### User Message\n{prompt}n\n### Assistant\n", + "stop": [""], + "chat_template": "{{ '### System Prompt\nYou are an intelligent programming assistant.\n\n' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '### User Message\n' + message['content'] + '\n' }}{% else %}{{ '### Assistant\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant\n' }}" + }, + "pricing": { "input": 200, "output": 200, "hourly": 0 }, + "created_at": "2023-09-09T00:29:14.496Z", + "update_at": "2023-09-09T00:29:14.496Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "testytiger" }], + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xE3b9434A627d4E042a82A4E04375E7B14D9a2866": 1 }, + "asks_updated": "2024-05-10T13:54:50.844650373Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "testytiger", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c81b4975e79f24d98b50", + "name": "Qwen/Qwen1.5-0.5B-Chat", + "display_name": "Qwen 1.5 Chat (0.5B)", + "display_type": "chat", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 500000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "stop": ["<|im_end|>", "<|im_start|>"], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "add_generation_prompt": true + }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2024-02-05T11:35:55.571Z", + "update_at": "2024-02-05T11:35:55.571Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x69d786B0E491C02c3053287F7FD4aa684A0f86B9": 1 }, + "asks_updated": "2024-05-10T14:34:01.502238784Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.07142857142857142, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c8164975e79f24d98b4f", + "name": "Qwen/Qwen1.5-0.5B", + "display_name": "Qwen 1.5 (0.5B)", + "display_type": "language", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 500000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": {}, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2024-02-05T11:35:50.032Z", + "update_at": "2024-02-05T11:35:50.032Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xa01d67F2450E0e7ACBfb7dc8B1a0A3205C5C8310": 1 }, + "asks_updated": "2024-05-11T00:20:07.81838798Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.07142857142857142, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c8284975e79f24d98b52", + "name": "Qwen/Qwen1.5-1.8B-Chat", + "display_name": "Qwen 1.5 Chat (1.8B)", + "display_type": "chat", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 1800000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "stop": ["<|im_end|>", "<|im_start|>"], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "add_generation_prompt": true + }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2024-02-05T11:36:08.609Z", + "update_at": "2024-02-05T11:36:08.609Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x332b426661a850784BAcFd12B9E7D9b51397B1ec": 1 }, + "asks_updated": "2024-05-10T19:50:02.900326326Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.16666666666666666, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c8214975e79f24d98b51", + "name": "Qwen/Qwen1.5-1.8B", + "display_name": "Qwen 1.5 (1.8B)", + "display_type": "language", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-1.8B", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 1800000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": {}, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2024-02-05T11:36:01.895Z", + "update_at": "2024-02-05T11:36:01.895Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xE1E3e79fC7e677c1Bdb8E6f6B6dde0B5d78C2ABc": 1 }, + "asks_updated": "2024-05-10T13:22:12.143866414Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.16666666666666666, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "663929111a16009453d858d6", + "name": "Qwen/Qwen1.5-110B-Chat", + "display_name": "Qwen 1.5 Chat (110B)", + "display_type": "chat", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-110B-Chat", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 110000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "owner_userid": null, + "config": { + "stop": ["<|im_end|>"], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "add_generation_prompt": true + }, + "pricing": { "input": 450, "output": 450, "hourly": 0 }, + "created_at": "2024-05-06T19:01:37.206Z", + "update_at": "2024-05-06T19:01:37.206Z", + "instances": [{ "avzone": "us-south-1a", "cluster": "mustymarfa" }], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x1bfE8838c1A5fA63cc1120e2de1Bce2599FDd946": 1 }, + "asks_updated": "2024-05-11T09:12:31.886283279Z", + "gpus": { "": 0 }, + "qps": 0.26666666666666666, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 143.4, + "throughput_out": 42.6, + "stats": [ + { + "avzone": "us-south-1a", + "cluster": "mustymarfa", + "capacity": 0.0476310802274163, + "qps": 0.26666666666666666, + "throughput_in": 143.4, + "throughput_out": 42.6, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c84d4975e79f24d98b58", + "name": "Qwen/Qwen1.5-14B-Chat", + "display_name": "Qwen 1.5 Chat (14B)", + "display_type": "chat", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-14B-Chat", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 14000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "stop": ["<|im_end|>", "<|im_start|>"], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "add_generation_prompt": true + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2024-02-05T11:36:45.529Z", + "update_at": "2024-02-05T11:36:45.529Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x13E78CCaCAc01069EF5a5505aa288eC3bb835eF3": 1 }, + "asks_updated": "2024-05-10T18:51:22.462254434Z", + "gpus": { "": 0 }, + "qps": 0.4, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 193.06666666666666, + "throughput_out": 136.2, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0.4341556171423196, + "qps": 0.4, + "throughput_in": 193.06666666666666, + "throughput_out": 136.2, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c8474975e79f24d98b57", + "name": "Qwen/Qwen1.5-14B", + "display_name": "Qwen 1.5 (14B)", + "display_type": "language", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-14B", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 14000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": {}, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2024-02-05T11:36:39.431Z", + "update_at": "2024-02-05T11:36:39.431Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x473F3790526C64D89f0d1598C022bE36492D3051": 1 }, + "asks_updated": "2024-05-10T18:51:30.246170129Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "660c48d16184ee782ae490f0", + "name": "Qwen/Qwen1.5-32B-Chat", + "display_name": "Qwen 1.5 Chat (32B)", + "display_type": "chat", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 32000000000, + "show_in_playground": "true", + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "stop": ["<|im_end|>", "<|im_start|>"], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "add_generation_prompt": true + }, + "pricing": { "input": 200, "output": 200, "hourly": 0 }, + "created_at": "2024-04-02T17:23:42.826Z", + "update_at": "2024-04-05T15:40:08.892Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xA47D7a9012B0e335809310AAc55497D50a855a3F": 1 }, + "asks_updated": "2024-05-11T05:55:35.551622457Z", + "gpus": { "": 0 }, + "qps": 0.26666666666666666, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 64.66666666666667, + "throughput_out": 124.26666666666667, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0.09569027819707074, + "qps": 0.26666666666666666, + "throughput_in": 64.66666666666667, + "throughput_out": 124.26666666666667, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "660c40783cd92bc225de4b41", + "name": "Qwen/Qwen1.5-32B", + "display_name": "Qwen 1.5 (32B)", + "display_type": "language", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 32000000000, + "show_in_playground": "true", + "isFeaturedModel": true, + "context_length": 32768, + "config": {}, + "pricing": { "input": 200, "output": 200, "hourly": 0 }, + "created_at": "2024-04-02T17:23:42.826Z", + "update_at": "2024-04-05T15:40:15.875Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xc0a1c6F29F6a40fAC5fedd7Bb1723c7bf566785A": 1 }, + "asks_updated": "2024-05-10T19:27:02.10899998Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c8344975e79f24d98b54", + "name": "Qwen/Qwen1.5-4B-Chat", + "display_name": "Qwen 1.5 Chat (4B)", + "display_type": "chat", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-4B-Chat", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 4000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "stop": ["<|im_end|>", "<|im_start|>"], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "add_generation_prompt": true + }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2024-02-05T11:36:20.314Z", + "update_at": "2024-02-05T11:36:20.314Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 4, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x32F674C484700968dEC9fe5D93C995179FAD2EE3": 1, + "0x56cab5C68705D192eA47A8Cf114c3904eC75c52E": 1, + "0x83783b52657B34c3e0C2938296009d398954dB26": 1, + "0xe34ba24c85fADb5E7fB2dBA0f292C9d25fF2B499": 1 + }, + "asks_updated": "2024-05-11T12:37:55.98626009Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c82e4975e79f24d98b53", + "name": "Qwen/Qwen1.5-4B", + "display_name": "Qwen 1.5 (4B)", + "display_type": "language", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-4B", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 4000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": {}, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2024-02-05T11:36:14.800Z", + "update_at": "2024-02-05T11:36:14.800Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x2cf9F631373B30D4E27961Ac0D58799Fa32D30dc": 1 }, + "asks_updated": "2024-05-10T16:47:58.648213115Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c85a4975e79f24d98b5a", + "name": "Qwen/Qwen1.5-72B-Chat", + "display_name": "Qwen 1.5 Chat (72B)", + "display_type": "chat", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-72B-Chat", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 72000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "stop": ["<|im_end|>", "<|im_start|>"], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "add_generation_prompt": true + }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2024-02-05T11:36:58.193Z", + "update_at": "2024-04-17T19:23:06.511Z", + "instances": [ + { "avzone": "us-central-5b", "cluster": "blusterybull" }, + { "avzone": "us-south-1a", "cluster": "mustymarfa" } + ], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x9b2ef3e00dba4a0949B037095AA8F4FC97aB76Ea": 1, + "0xCE288A4aAf0EBc35C602441F03F09139993994A6": 1, + "0xcC4AB060c2cbe72ad1466eedE837Fb3Ca7015120": 1 + }, + "asks_updated": "2024-05-11T12:20:13.616737256Z", + "gpus": { "": 0 }, + "qps": 0.4666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 770.0666666666667, + "throughput_out": 126.4, + "stats": [ + { + "avzone": "us-central-5b", + "cluster": "blusterybull", + "capacity": 0.043187871337840605, + "qps": 0.4, + "throughput_in": 732, + "throughput_out": 124.4, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-south-1a", + "cluster": "mustymarfa", + "capacity": 0, + "qps": 0.06666666666666667, + "throughput_in": 38.06666666666667, + "throughput_out": 2, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c8544975e79f24d98b59", + "name": "Qwen/Qwen1.5-72B", + "display_name": "Qwen 1.5 (72B)", + "display_type": "language", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-72B", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 72000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": {}, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2024-02-05T11:36:52.008Z", + "update_at": "2024-02-05T11:36:52.008Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x37A5f0f9744F5bC79Da7908E1b70C10502C4b4cf": 1 }, + "asks_updated": "2024-05-10T18:50:03.489164666Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0.3333333333333333, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c8404975e79f24d98b56", + "name": "Qwen/Qwen1.5-7B-Chat", + "display_name": "Qwen 1.5 Chat (7B)", + "display_type": "chat", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-7B-Chat", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "stop": ["<|im_end|>", "<|im_start|>"], + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-02-05T11:36:32.804Z", + "update_at": "2024-02-05T11:36:32.804Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x1D0455b2E77572f9584b859f1463114BD4D4EFDE": 1 }, + "asks_updated": "2024-05-11T01:45:17.557563997Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c0c83a4975e79f24d98b55", + "name": "Qwen/Qwen1.5-7B", + "display_name": "Qwen 1.5 (7B)", + "display_type": "language", + "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.", + "license": "tongyi-qianwen-research", + "link": "https://huggingface.co/Qwen/Qwen1.5-7B", + "creator_organization": "Qwen", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": {}, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-02-05T11:36:26.420Z", + "update_at": "2024-02-05T11:36:26.420Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x2ccdcdEf417d5d6D2EeD95dF48f1fcc8Ec1085b2": 1 }, + "asks_updated": "2024-05-11T05:55:32.170734058Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0.1, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acee11227f790586239d36", + "name": "SG161222/Realistic_Vision_V3.0_VAE", + "display_name": "Realistic Vision 3.0", + "display_type": "image", + "description": "Fine-tune version of Stable Diffusion focused on photorealism.", + "license": "creativeml-openrail-m", + "link": "https://huggingface.co/SG161222/Realistic_Vision_V1.4", + "creator_organization": "SG161222", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "show_in_playground": true, + "isFeaturedModel": true, + "external_pricing_url": "https://www.together.xyz/apis#pricing", + "config": { "height": 1024, "width": 1024, "steps": 20, "number_of_images": 2, "seed": 42 }, + "created_at": "2023-07-11T05:52:17.219Z", + "update_at": "2023-07-11T05:52:17.219Z", + "descriptionLink": "", + "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 }, + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x1E128f472069E38aEF6B8f25147B42EF81f0F3C0": 1 }, + "asks_updated": "2024-05-11T02:40:41.799352496Z", + "gpus": { "NVIDIA A40": 1 }, + "options": { "input=text,image": 1 }, + "qps": 0.0429948, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 3.8357315 + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "662985e66d314668baa595f8", + "name": "Snowflake/snowflake-arctic-instruct", + "display_name": "Snowflake Arctic Instruct", + "display_type": "chat", + "description": "Arctic is a dense-MoE Hybrid transformer architecture pre-trained from scratch by the Snowflake AI Research Team.", + "license": "Apache-2.0", + "link": "https://huggingface.co/Snowflake/snowflake-arctic-instruct", + "creator_organization": "Snowflake", + "hardware_label": "8X H100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "480000000000", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "add_generation_prompt": true, + "chat_template_name": "default", + "stop": ["<|im_start|>", "<|im_end|>"] + }, + "pricing": { "input": 600, "output": 600, "hourly": 0 }, + "update_at": "2024-05-07T05:05:41.946Z", + "instances": [{ "avzone": "us-central-6a", "cluster": "mirthfulproxy2" }], + "engine": "vllm", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x7F8D3B29224f2a7f2c88118B67815AdCf3E2228d": 1 }, + "asks_updated": "2024-05-10T14:43:17.112345066Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-6a", + "cluster": "mirthfulproxy2", + "capacity": 0.86, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "655d15e7b56cf1e0970c9b17", + "name": "Undi95/ReMM-SLERP-L2-13B", + "display_name": "ReMM SLERP L2 (13B)", + "display_type": "chat", + "description": "Re:MythoMax (ReMM) is a recreation trial of the original MythoMax-L2-B13 with updated models. This merge use SLERP [TESTING] to merge ReML and Huginn v1.2.", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/Undi95/ReMM-SLERP-L2-13B", + "creator_organization": "Undi95", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 13000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { + "prompt_format": "[INST]\n {prompt} \n[/INST]\n\n", + "stop": ["[INST]", "\n\n"], + "chat_template_name": "llama", + "add_generation_prompt": true + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-11-21T20:41:11.759Z", + "update_at": "2023-11-21T20:41:11.759Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x07c96Eeb1Bb52ae6FB40543f6188912775F35d52": 1 }, + "asks_updated": "2024-05-10T17:32:28.22917725Z", + "gpus": { "": 0 }, + "qps": 0.06666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 62.46666666666667, + "throughput_out": 0.6666666666666666, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.13333333333333333, + "qps": 0.06666666666666667, + "throughput_in": 62.46666666666667, + "throughput_out": 0.6666666666666666, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "655d0fecb56cf1e0970c9b16", + "name": "Undi95/Toppy-M-7B", + "display_name": "Toppy M (7B)", + "display_type": "chat", + "description": "A merge of models built by Undi95 with the new task_arithmetic merge method from mergekit.", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/Undi95/Toppy-M-7B", + "creator_organization": "Undi95", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 7241748480, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { + "stop": ["###"], + "prompt_format": "### Instruction:\n{prompt}\n\n### Response:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n\n' }}{% endif %}{% endfor %}{{ '### Response:' }}", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-11-21T20:15:40.468Z", + "update_at": "2023-11-21T20:15:40.468Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x80bd2D4302331454187F9EdA8b88e99d6E4A6c9b": 1 }, + "asks_updated": "2024-05-11T07:32:00.722382147Z", + "gpus": { "": 0 }, + "qps": 0.13333333333333333, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 138.46666666666667, + "throughput_out": 8.133333333333333, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.1111111111111111, + "qps": 0.13333333333333333, + "throughput_in": 138.46666666666667, + "throughput_out": 8.133333333333333, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "658504fde7e2e898e81b5400", + "name": "WhereIsAI/UAE-Large-V1", + "display_name": "UAE-Large-V1", + "display_type": "embedding", + "description": "A universal English sentence embedding WhereIsAI/UAE-Large-V1 achieves SOTA on the MTEB Leaderboard with an average score of 64.64!", + "license": "apache-2.0", + "link": "https://huggingface.co/bert-base-uncased", + "creator_organization": "WhereIsAI", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 330000000, + "show_in_playground": true, + "isFeaturedModel": true, + "pricing": { "hourly": 0, "input": 4, "output": 4, "finetune": 0, "base": 0 }, + "created_at": "2023-12-22T03:39:41.105Z", + "update_at": "2023-12-22T03:45:34.219Z", + "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }], + "isFinetuned": false, + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x97E9EAE94B8498A57f4F9033A32d722323C294C8": 1, + "0xb8Bfb7F25770CfF8bf88ddF1D29237f1D5604d96": 1 + }, + "asks_updated": "2024-05-11T03:02:55.096371076Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-2a", + "cluster": "jollyllama", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64fbbc5adfdb1e4b06b5d5cd", + "name": "WizardLM/WizardCoder-15B-V1.0", + "display_name": "WizardCoder v1.0 (15B)", + "display_type": "code", + "description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.", + "license": "llama2", + "creator_organization": "WizardLM", + "hardware_label": "A100 80GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 15517462528, + "show_in_playground": true, + "context_length": 8192, + "config": { + "prompt_format": "### Instruction:\n{prompt}\n\n### Response:\n", + "stop": ["###", "<|endoftext|>"], + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}" + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-09-09T00:29:14.496Z", + "update_at": "2023-09-09T00:29:14.496Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x4C51aff4170724263bF75af64CE9E2e8F6079fA9": 1, + "0xb4CdE622719696fd930e92FB5bBfC3eA3176D2Fd": 1 + }, + "asks_updated": "2024-05-11T02:06:56.287724569Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64f672e8bc372ce719b97f02", + "name": "WizardLM/WizardCoder-Python-34B-V1.0", + "display_name": "WizardCoder Python v1.0 (34B)", + "display_type": "code", + "description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.", + "license": "llama2", + "creator_organization": "WizardLM", + "hardware_label": "2x A100 80GB", + "pricing_tier": "supported", + "num_parameters": 34000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "config": { + "stop": ["", "###"], + "prompt_format": "### Instruction:\n{prompt}\n### Response:\n" + }, + "pricing": { "input": 200, "output": 200, "hourly": 0 }, + "created_at": "2023-09-05T00:14:32.365Z", + "update_at": "2023-09-05T00:14:32.365Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xAC3abeabCb3Ef089becEA8b551a4e998AD8dDF30": 1 }, + "asks_updated": "2024-05-11T09:54:27.918691661Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6567d4e5d1c5e59967640530", + "name": "WizardLM/WizardLM-13B-V1.2", + "display_name": "WizardLM v1.2 (13B)", + "display_type": "chat", + "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities", + "license": "llama2", + "creator_organization": "WizardLM", + "hardware_label": "A100", + "pricing_tier": "Featured", + "num_parameters": 13000000000, + "release_date": "2023-11-01T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "stop": ["", "USER:", "ASSISTANT:"], + "prompt_format": "USER: {prompt} ASSISTANT:", + "add_generation_prompt": true, + "chat_template_name": "llama", + "pre_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. " + }, + "pricing": { "input": 50, "output": 50 }, + "created_at": "2023-11-30T00:18:45.791Z", + "update_at": "2023-11-30T01:20:01.779Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xF9d994b8D62c40bA7532917955dc49D4712C6Ec0": 1 }, + "asks_updated": "2024-05-10T14:31:00.559469906Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.09090909090909091, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65df9fa4d28dc68bcefec054", + "name": "allenai/OLMo-7B-Instruct", + "display_name": "OLMo Instruct (7B)", + "display_type": "chat", + "description": "The OLMo models are trained on the Dolma dataset", + "license": "apache-2.0", + "link": "https://huggingface.co/allenai/OLMo-7B-Instruct", + "creator_organization": "AllenAI", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 2048, + "config": { + "eos_token": "<|endoftext|>", + "prompt_format": "<|user|>\n{prompt}\n<|assistant|>", + "stop": ["<|endoftext|>"], + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}{% elif message['role'] == 'assistant' %}{{ '<|assistant|>\n' + message['content'] + eos_token }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}{% endfor %}", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-02-28T21:03:32.038Z", + "update_at": "2024-02-28T21:03:32.038Z", + "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xD29D5B02918F962505749Ace7d67AB3E2acAbc67": 1 }, + "asks_updated": "2024-05-11T02:57:58.795395564Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-2a", + "cluster": "jollyllama", + "capacity": 0.0625, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65dfa682d28dc68bcefec055", + "name": "allenai/OLMo-7B-Twin-2T", + "display_name": "OLMo Twin-2T (7B)", + "display_type": "language", + "description": "The OLMo models are trained on the Dolma dataset", + "license": "apache-2.0", + "link": "https://huggingface.co/allenai/OLMo-7B-Twin-2T", + "creator_organization": "AllenAI", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 2048, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-02-28T21:32:50.812Z", + "update_at": "2024-02-28T21:32:50.812Z", + "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x3f2D5E8E2C72C0A63A478da9774d8C2F1F4E5c55": 1 }, + "asks_updated": "2024-05-11T03:07:59.684414475Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-2a", + "cluster": "jollyllama", + "capacity": 0.07142857142857142, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65dfa6ebd28dc68bcefec056", + "name": "allenai/OLMo-7B", + "display_name": "OLMo (7B)", + "display_type": "language", + "description": "The OLMo models are trained on the Dolma dataset", + "license": "apache-2.0", + "link": "https://huggingface.co/allenai/OLMo-7B", + "creator_organization": "AllenAI", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 2048, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-02-28T21:34:35.444Z", + "update_at": "2024-02-28T21:34:35.444Z", + "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xfC0C60D66A62b2A87f96B3318500e876F1B1e367": 1 }, + "asks_updated": "2024-05-11T02:56:32.514653629Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-2a", + "cluster": "jollyllama", + "capacity": 0.07142857142857142, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6598bc0201bf780326e7eac8", + "name": "bert-base-uncased", + "display_name": "Bert Base Uncased", + "display_type": "embedding", + "description": "original BERT model", + "license": "Apache-2", + "creator_organization": "Google", + "hardware_label": "A40", + "pricing_tier": "Featured", + "num_parameters": 46550608, + "release_date": "2023-11-15T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 }, + "created_at": "2024-01-06T02:33:38.323Z", + "update_at": "2024-01-06T02:33:38.323Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 6, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x0b7eae8cCeb3D67b02A97ac2D1100E29E6991EB9": 1, + "0x21558AA2fCc15eF003135a4108a0884d4A3054f2": 1, + "0x2fb2cf26D55c96dc0BAad5f088b0e5Bf0FDe565B": 1, + "0x5857eaB3609A074E402972C3DDDE8957ea4E7dC5": 1, + "0xB49Bf891cBeba9F3e5045acbD9CD7C3fD932A543": 1, + "0xC412E22A5B1CE26b65B80f2217b9419369057714": 1 + }, + "asks_updated": "2024-05-11T03:13:31.258661006Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6de95e620478cfa14425b", + "name": "codellama/CodeLlama-13b-Instruct-hf", + "display_name": "Code Llama Instruct (13B)", + "display_type": "chat", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "LLAMA 2 Community license Agreement (Meta)", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": "13016028160", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 16384, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "add_generation_prompt": true, + "stop": ["", "[INST]"], + "chat_template_name": "llama" + }, + "pricing": { "input": 55, "output": 55, "hourly": 0 }, + "created_at": "2023-08-24T17:09:14.381Z", + "update_at": "2023-12-04T05:01:42.539Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 3, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x085bF8877517A750f62641F8FE4C5a2D6b26e899": 1, + "0x934A45b707cbe77453d7d14F4d84F31CaF8adc6F": 1, + "0xA6c2278710AC89440e150857521e67572D52f303": 1 + }, + "asks_updated": "2024-05-11T05:55:30.372800468Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6de95e620478cfa14425a", + "name": "codellama/CodeLlama-13b-Python-hf", + "display_name": "Code Llama Python (13B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "LLAMA 2 Community license Agreement (Meta)", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": "13016028160", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 16384, + "config": { "stop": [""], "chat_template_name": "llama" }, + "pricing": { "input": 55, "output": 55, "hourly": 0 }, + "created_at": "2023-08-24T17:09:14.381Z", + "update_at": "2023-12-20T22:52:59.177Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 6, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x1F6868Df357950c3F6E5804a60d146A883f1fC7b": 1, + "0x6b3EbBfa6c3DFDa17dD19c35557A9F3bAdD55583": 1, + "0x8eb3F32C3999eaD4867f54ABE0098a0bFE9e2f23": 1, + "0xA405565bdBf98e1aFd8CcBEdc028F0546c41eB47": 1, + "0xBd7eC5bF0b33b56c916A4b2deB99A37025837d9a": 1, + "0xc710087956F114639A3726cb6d4302B125822574": 1 + }, + "asks_updated": "2024-05-11T12:35:27.588482466Z", + "gpus": { "": 0 }, + "qps": 0.4, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 201.46666666666667, + "throughput_out": 142.73333333333332, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0, + "qps": 0.4, + "throughput_in": 201.46666666666667, + "throughput_out": 142.73333333333332, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6de95e620478cfa144261", + "name": "codellama/CodeLlama-34b-Instruct-hf", + "display_name": "Code Llama Instruct (34B)", + "display_type": "chat", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "LLAMA 2 Community license Agreement (Meta)", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": 34000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 16384, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "add_generation_prompt": true, + "stop": ["", "[INST]"], + "chat_template_name": "llama", + "tools_template": "{{ '<>\\n' + systemMessage['content'] + '\\n\\nYou can access the following functions. Use them if required -\\n' + tools + '\\n<>\\n\\n' + message['content'] }}" + }, + "pricing": { "input": 194, "output": 194, "hourly": 0 }, + "created_at": "2023-08-24T17:28:42.172Z", + "update_at": "2023-08-24T17:28:42.172Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xF5546B0d0414AFfc8ee2Dc36D61EcAF3a2ec65F5": 1 }, + "asks_updated": "2024-05-10T17:12:16.470434811Z", + "gpus": { "": 0 }, + "qps": 0.06666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 93.6, + "throughput_out": 14.8, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0.058823529411764705, + "qps": 0.06666666666666667, + "throughput_in": 93.6, + "throughput_out": 14.8, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6de95e620478cfa144260", + "name": "codellama/CodeLlama-34b-Python-hf", + "display_name": "Code Llama Python (34B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "LLAMA 2 Community license Agreement (Meta)", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": 34000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 16384, + "config": { "stop": [""], "chat_template_name": "llama" }, + "pricing": { "input": 194, "output": 194, "hourly": 0 }, + "created_at": "2023-08-24T17:28:42.172Z", + "update_at": "2023-08-24T17:28:42.172Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xe09fF3EE0889C8F5c9e434E8AF523649805E34e1": 1 }, + "asks_updated": "2024-05-10T17:08:19.24073671Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65b6f505752a299002ee4dc9", + "name": "codellama/CodeLlama-70b-Instruct-hf", + "display_name": "Code Llama Instruct (70B)", + "display_type": "chat", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "apache-2.0", + "link": "https://huggingface.co/codellama/CodeLlama-70b-Instruct-hf", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "70000000000", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "chat_template": "{{ bos_token + ' ' }}{% for message in messages %}{{'Source: ' + message['role'].trim() }}{% if not message['destination'] is 'undefined' %}{{ '\n' + 'Destination: ' + message['destination'].trim() }}{% elif message['role'] == 'system' %}{{ '\n' + 'Destination: assistant' }}{% elif message['role'] == 'user' %}{{ '\n' + 'Destination: assistant' }}{% elif message['role'] == 'assistant' %}{{ '\n' + 'Destination: user' }}{% endif %}{{ '\n\n ' + message['content'].trim() + '' + ' '}}{% endfor %}{% if add_generation_prompt %}{{ 'Source: assistant' + '\n' }}{{ 'Destination: user' + '\n\n' + ' ' }}{% endif %}", + "bos_token": "", + "step_id": "", + "stop": [""], + "add_generation_prompt": true + }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2024-01-29T00:44:53.513Z", + "update_at": "2024-01-29T00:44:53.513Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xa2663C264Db2177E3Ae3Ea643152B2b9b1f1dA6c": 1 }, + "asks_updated": "2024-05-11T05:55:32.068494589Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0.010869565217391304, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65b6f4ba752a299002ee4dc7", + "name": "codellama/CodeLlama-70b-Python-hf", + "display_name": "Code Llama Python (70B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "apache-2.0", + "link": "https://huggingface.co/codellama/CodeLlama-70b-Python-hf", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "70000000000", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { "stop": [""] }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2024-01-29T00:43:38.396Z", + "update_at": "2024-01-29T00:43:38.396Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xf2a7de1a0E1dC83DC5B1f1dE8783dFEc67be8910": 1 }, + "asks_updated": "2024-05-10T18:49:47.188860922Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65b6f4d4752a299002ee4dc8", + "name": "codellama/CodeLlama-70b-hf", + "display_name": "Code Llama (70B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "apache-2.0", + "link": "https://huggingface.co/codellama/CodeLlama-70b-hf", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "70000000000", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 16384, + "config": { "stop": [""] }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2024-01-29T00:44:04.149Z", + "update_at": "2024-01-29T00:44:04.149Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 5, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x39E9cFF7e3169839d6D0Cd8262E618c2AaaA3625": 1, + "0x4e5281dB0A47701fb2Bb5669175C2A51d5e06496": 1, + "0x8310bA7683A57Ce0d2A431FC53b74FCc1bF2cF94": 1, + "0xd3C0F1d52BeBdB36d105AFB149b16fbb41a30810": 1, + "0xe8eC4B8AA74A5Fc2FC3A961F97fE6dE030f979b1": 1 + }, + "asks_updated": "2024-05-11T05:55:39.590729933Z", + "gpus": { "": 0 }, + "qps": 0.6, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 621.7333333333333, + "throughput_out": 48.666666666666664, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 1.21875, + "qps": 0.6, + "throughput_in": 621.7333333333333, + "throughput_out": 48.666666666666664, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6de95e620478cfa14425e", + "name": "codellama/CodeLlama-7b-Instruct-hf", + "display_name": "Code Llama Instruct (7B)", + "display_type": "chat", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "LLAMA 2 Community license Agreement (Meta)", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": "6738546688", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 16384, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["", "[INST]"], + "chat_template_name": "llama", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-08-24T17:08:25.379Z", + "update_at": "2023-08-24T17:08:25.379Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x6Af33E97914777b841B6f7a11332bF142301403F": 1 }, + "asks_updated": "2024-05-11T11:22:28.25491246Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.25, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6de95e620478cfa14425d", + "name": "codellama/CodeLlama-7b-Python-hf", + "display_name": "Code Llama Python (7B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "LLAMA 2 Community license Agreement (Meta)", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": "6738546688", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 16384, + "config": { "stop": [""], "chat_template_name": "llama" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-08-24T17:08:25.379Z", + "update_at": "2023-08-24T17:08:25.379Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x68494584Fc039C3698b8E02eDe63d45DE596DE17": 1, + "0xcd079137d79089474d4d876109cECE8bb33E7Ef9": 1 + }, + "asks_updated": "2024-05-11T12:05:29.893871627Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "660899dc98fbb3f86d028781", + "name": "cognitivecomputations/dolphin-2.5-mixtral-8x7b", + "display_name": "Dolphin 2.5 Mixtral 8x7b", + "display_type": "chat", + "description": "This Dolphin is really good at coding, I trained with a lot of coding data. It is very obedient but it is not DPO tuned - so you still might need to encourage it in the system prompt as I show in the below examples.", + "license": "apache-2.0", + "link": "https://huggingface.co/cognitivecomputations/dolphin-2.5-mixtral-8x7b", + "creator_organization": "cognitivecomputations", + "hardware_label": "8x H100 80GB SXM", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 56000000000, + "release_date": "2023-08-01T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "stop": ["<|im_end|>", "<|im_start|>"], + "pre_prompt": "<|im_start|>system\nYou are Dolphin, an uncensored and unbiased AI assistant.<|im_end|>\n", + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "add_generation_prompt": true, + "chat_template_name": "default" + }, + "pricing": { "input": 150, "output": 150, "hourly": 0 }, + "created_at": "2023-09-05T19:58:25.683Z", + "update_at": "2023-09-05T19:58:25.683Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "isDedicatedInstance": false, + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xC247D4e301f7bB0ecf719577741bF912919332e6": 1 }, + "asks_updated": "2024-05-10T19:00:59.773522401Z", + "gpus": { "": 0 }, + "qps": 0.06666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 96.2, + "throughput_out": 10.266666666666667, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0, + "qps": 0.06666666666666667, + "throughput_in": 96.2, + "throughput_out": 10.266666666666667, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "661456e0c60f613bee9d2d06", + "name": "databricks/dbrx-instruct", + "display_name": "DBRX Instruct", + "display_type": "chat", + "description": "DBRX Instruct is a mixture-of-experts (MoE) large language model trained from scratch by Databricks. DBRX Instruct specializes in few-turn interactions.", + "license": "Databricks Open Model License", + "link": "https://huggingface.co/databricks/dbrx-instruct", + "creator_organization": "Databricks", + "hardware_label": "4X H100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "132000000000", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "add_generation_prompt": true, + "chat_template_name": "default", + "stop": ["<|im_start|>", "<|im_end|>"] + }, + "pricing": { "input": 300, "output": 300, "hourly": 0 }, + "instances": [{ "avzone": "us-central-6a", "cluster": "mirthfulproxy2" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xEf707f83DC8C7BA4C1b1D289C3380dF993A3E507": 1 }, + "asks_updated": "2024-05-10T15:35:58.638900703Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-6a", + "cluster": "mirthfulproxy2", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65c3137e4975e79f24d98b5c", + "name": "deepseek-ai/deepseek-coder-33b-instruct", + "display_name": "Deepseek Coder Instruct (33B)", + "display_type": "chat", + "description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.", + "license": "deepseek", + "link": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct", + "creator_organization": "DeepSeek", + "pricing_tier": "Featured", + "num_parameters": 33000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 16384, + "config": { + "prompt_format": "", + "stop": ["<|EOT|>", "<|begin▁of▁sentence|>", "<|end▁of▁sentence|>"], + "bos_token": "<|begin▁of▁sentence|>", + "add_generation_prompt": true, + "chat_template": "{{'<|begin▁of▁sentence|>'}}{%- for message in messages %}{%- if message['role'] == 'system' %}{{ message['content'] }}{%- else %}{%- if message['role'] == 'user' %}{{'### Instruction:\\n' + message['content'] + '\\n'}}{%- else %}{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}{%- endif %}{%- endif %}{%- endfor %}{% if add_generation_prompt %}{{'### Response:'}}{% endif %}" + }, + "pricing": { "input": 200, "output": 200, "hourly": 0 }, + "created_at": "2024-02-07T05:22:06.809Z", + "update_at": "2024-02-07T05:22:06.809Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 3, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x25DEF2A3bBB026031AB7eED0439aC90cb9269E2D": 1, + "0x794F82c9417C98e5B30A14165481686E8e94251f": 1, + "0xA6091E5e79d33269023eC3413e1a4bD94870685C": 1 + }, + "asks_updated": "2024-05-10T21:27:01.421440752Z", + "gpus": { "": 0 }, + "qps": 2.4, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 2281.866666666667, + "throughput_out": 535.0666666666667, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0.49519890260630994, + "qps": 2.4, + "throughput_in": 2281.866666666667, + "throughput_out": 535.0666666666667, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "660c58976184ee782ae490f1", + "name": "deepseek-ai/deepseek-llm-67b-chat", + "display_name": "DeepSeek LLM Chat (67B)", + "display_type": "chat", + "description": "trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese", + "license": "deepseek", + "link": "https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat", + "creator_organization": "DeepSeek", + "pricing_tier": "", + "num_parameters": 67000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "owner_userid": "", + "config": { + "prompt_format": "", + "stop": ["<|begin▁of▁sentence|>", "<|end▁of▁sentence|>"], + "bos_token": "<|begin▁of▁sentence|>", + "add_generation_prompt": true, + "chat_template": "{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %} {{ 'User: ' + message['content'] + '\n\n'}}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + '<|end▁of▁sentence|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}" + }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2024-04-02T19:12:23.328Z", + "update_at": "2024-04-02T19:12:23.328Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x9E9Eb2e3fD4122006fF73bE9Bb0aFF2572549326": 1 }, + "asks_updated": "2024-05-10T14:04:42.701320488Z", + "gpus": { "": 0 }, + "qps": 0.13333333333333333, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 40.13333333333333, + "throughput_out": 31.866666666666667, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.125, + "qps": 0.13333333333333333, + "throughput_in": 40.13333333333333, + "throughput_out": 31.866666666666667, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64f676f7bc372ce719b97f04", + "name": "garage-bAInd/Platypus2-70B-instruct", + "display_name": "Platypus2 Instruct (70B)", + "display_type": "chat", + "description": "An instruction fine-tuned LLaMA-2 (70B) model by merging Platypus2 (70B) by garage-bAInd and LLaMA-2 Instruct v2 (70B) by upstage.", + "license": "CC BY-NC-4.0", + "creator_organization": "garage-bAInd", + "hardware_label": "2x A100 80GB", + "pricing_tier": "featured", + "num_parameters": 70000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "stop": ["", "###"], + "prompt_format": "### Instruction:\n{prompt}\n### Response:\n", + "add_generation_prompt": true, + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %} {{ '### Instruction:\n' + message['content'] + '\n' }}{% elif message['role'] == 'system' %}{{ '### System:\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% if loop.last %}{{ '### Response:\n' }}{% endif %}{% endfor %}" + }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2023-09-05T00:31:51.264Z", + "update_at": "2023-09-07T01:46:29.338Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x32fA272f7D81963fc8EE3DCA70E28a00BB5f2617": 1 }, + "asks_updated": "2024-05-10T18:51:18.425217527Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0.2333333333333333, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65d7e89e03b97802d3af0512", + "name": "google/gemma-2b-it", + "display_name": "Gemma Instruct (2B)", + "display_type": "chat", + "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", + "license": "gemma-terms-of-use", + "link": "https://huggingface.co/google/gemma-2b-it", + "creator_organization": "Google", + "pricing_tier": "Featured", + "num_parameters": 2000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "config": { + "stop": ["", ""], + "chat_template": "{{ bos_token }}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{% for message in messages %}{{'' + role + '\n' + message['content'] + '' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>model\n' }}{% endif %}", + "bos_token": "" + }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2024-02-23T00:36:46.381Z", + "update_at": "2024-02-23T00:36:46.381Z", + "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x83ff7421906004DEa319FB2Dc5766F86f146973E": 1 }, + "asks_updated": "2024-05-11T03:02:55.608299306Z", + "gpus": { "": 0 }, + "qps": 0.8, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 55.733333333333334, + "throughput_out": 5.533333333333333, + "stats": [ + { + "avzone": "us-central-2a", + "cluster": "jollyllama", + "capacity": 0.0078125, + "qps": 0.8, + "throughput_in": 55.733333333333334, + "throughput_out": 5.533333333333333, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65d7e93203b97802d3af0513", + "name": "google/gemma-2b", + "display_name": "Gemma (2B)", + "display_type": "language", + "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", + "license": "gemma-terms-of-use", + "link": "https://huggingface.co/google/gemma-2b", + "creator_organization": "Google", + "pricing_tier": "Featured", + "num_parameters": 2000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2024-02-23T00:39:14.772Z", + "update_at": "2024-02-23T00:39:14.772Z", + "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x3B05b29E71860Ca416cEe96c7e793c36fc4Ce5Ff": 1, + "0x9CFcBB9434f86b6Ce544DB9880af29d188d9433f": 1 + }, + "asks_updated": "2024-05-11T03:12:00.872484732Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-2a", + "cluster": "jollyllama", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65d7ea3d03b97802d3af0515", + "name": "google/gemma-7b-it", + "display_name": "Gemma Instruct (7B)", + "display_type": "chat", + "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", + "license": "gemma-terms-of-use", + "link": "https://huggingface.co/google/gemma-7b-it", + "creator_organization": "Google", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "config": { + "stop": ["", ""], + "chat_template": "{{ bos_token }}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{% for message in messages %}{{'' + role + '\n' + message['content'] + '' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>model\n' }}{% endif %}", + "bos_token": "" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-02-23T00:43:41.936Z", + "update_at": "2024-02-23T00:43:41.936Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xa368a540D087220119290B897192743bFE379beE": 1 }, + "asks_updated": "2024-05-10T16:04:09.532225327Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65d7ea3b03b97802d3af0514", + "name": "google/gemma-7b", + "display_name": "Gemma (7B)", + "display_type": "language", + "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.", + "license": "gemma-terms-of-use", + "link": "https://huggingface.co/google/gemma-7b", + "creator_organization": "Google", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-02-23T00:43:39.642Z", + "update_at": "2024-02-23T00:43:39.642Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "isPrivate": false, + "access_control": [], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xbcD29dE615e898c76dc514D5DD7461CF0Be72245": 1 }, + "asks_updated": "2024-05-11T06:31:48.292488587Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64f678e7bc372ce719b97f06", + "name": "lmsys/vicuna-13b-v1.5", + "display_name": "Vicuna v1.5 (13B)", + "display_type": "chat", + "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.", + "license": "llama2", + "creator_organization": "LM Sys", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 13000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "stop": [""], + "prompt_format": "USER: {prompt}\nASSISTANT:", + "chat_template": "{% for message in messages %}{{message['role'].toLocaleUpperCase() + ': ' + message['content'] + '\n'}}{% endfor %}{{ 'ASSISTANT:' }}", + "add_generation_prompt": true + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-09-05T00:40:07.763Z", + "update_at": "2023-09-05T00:40:07.763Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x8C25c0cAC3C50A94Fa1444a843BD3ab684640fc0": 1 }, + "asks_updated": "2024-05-11T12:20:14.699866977Z", + "gpus": { "": 0 }, + "qps": 0.06666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 142.53333333333333, + "throughput_out": 23.266666666666666, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.09090909090909091, + "qps": 0.06666666666666667, + "throughput_in": 142.53333333333333, + "throughput_out": 23.266666666666666, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "652da26579174a6bc507647f", + "name": "lmsys/vicuna-7b-v1.5", + "display_name": "Vicuna v1.5 (7B)", + "display_type": "chat", + "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/lmsys/vicuna-7b-v1.5", + "creator_organization": "LM Sys", + "hardware_label": "A40 48GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 6738415616, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { + "stop": ["", "USER:"], + "add_generation_prompt": true, + "prompt_format": "USER: {prompt}\nASSISTANT: Hello!", + "chat_template": "{% for message in messages %}{{message['role'].toLocaleUpperCase() + ': ' + message['content'] + '\n'}}{% endfor %}{{ 'ASSISTANT:' }}" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-10-16T20:51:49.194Z", + "update_at": "2023-10-16T20:51:49.194Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x9d8f9Db61974247B3743b4492f24C424d6Ec9647": 1 }, + "asks_updated": "2024-05-11T12:20:55.695488101Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6dd9de620478cfa144258", + "name": "meta-llama/Llama-2-13b-chat-hf", + "display_name": "LLaMA-2 Chat (13B)", + "display_type": "chat", + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/meta-llama/Llama-2-13b-chat-hf", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "13015864320", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["[/INST]", ""], + "add_generation_prompt": true, + "chat_template_name": "llama" + }, + "pricing": { "input": 55, "output": 55, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2023-12-04T05:00:54.436Z", + "instances": [{ "avzone": "us-west-1a", "cluster": "curiouscrow" }], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x582Ee7216416721CF6101f0A37098C2741824E4B": 1 }, + "asks_updated": "2024-05-10T17:20:38.284283484Z", + "gpus": { "": 0 }, + "qps": 1.2, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 173.4, + "throughput_out": 90.66666666666667, + "stats": [ + { + "avzone": "us-west-1a", + "cluster": "curiouscrow", + "capacity": 0.1351851851851851, + "qps": 1.2, + "throughput_in": 173.4, + "throughput_out": 90.66666666666667, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6dd03e620478cfa144255", + "name": "meta-llama/Llama-2-13b-hf", + "display_name": "LLaMA-2 (13B)", + "display_type": "language", + "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/meta-llama/Llama-2-13b-hf", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "13015864320", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { "stop": [""] }, + "pricing": { "input": 55, "output": 55, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2023-12-04T05:07:52.318Z", + "instances": [{ "avzone": "us-west-1a", "cluster": "curiouscrow" }], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x4d41543337D4c322a31a0F9913af3C8708876249": 1 }, + "asks_updated": "2024-05-10T21:29:01.763024089Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-west-1a", + "cluster": "curiouscrow", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6dd95e620478cfa144257", + "name": "meta-llama/Llama-2-70b-chat-hf", + "display_name": "LLaMA-2 Chat (70B)", + "display_type": "chat", + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/meta-llama/Llama-2-70b-chat-hf", + "creator_organization": "Meta", + "hardware_label": "2X A100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "68976648192", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["[/INST]", ""], + "add_generation_prompt": true, + "chat_template_name": "llama" + }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2024-04-19T01:11:44.938Z", + "autopilot_pool": "cr-a100-80-2x", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x8E20042a6661ccC893087dE6B593f6F1998769dE": 1, + "0xcd4a3777cA2A18Fe8AebDc19A9411c799a8282DC": 1 + }, + "asks_updated": "2024-05-11T01:00:59.944127024Z", + "gpus": { "": 0 }, + "qps": 0.4666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 419.4, + "throughput_out": 118, + "error_rate": 44.2, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.15625, + "qps": 0.4666666666666667, + "throughput_in": 419.4, + "throughput_out": 118, + "error_rate": 44.2, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6dd0ee620478cfa144256", + "name": "meta-llama/Llama-2-70b-hf", + "display_name": "LLaMA-2 (70B)", + "display_type": "language", + "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/meta-llama/Llama-2-70b-hf", + "creator_organization": "Meta", + "hardware_label": "2X A100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "68976648192", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { "stop": [""] }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2023-07-18T22:46:55.042Z", + "autopilot_pool": "cr-a100-80-2x", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xB7F462fEd161Ff92f48aaF2302C2a19fA01FdeB4": 1 }, + "asks_updated": "2024-05-11T01:01:02.822830948Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6dda7e620478cfa144259", + "name": "meta-llama/Llama-2-7b-chat-hf", + "display_name": "LLaMA-2 Chat (7B)", + "display_type": "chat", + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "6738415616", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["[/INST]", ""], + "add_generation_prompt": true, + "chat_template_name": "llama" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2023-07-18T22:46:55.042Z", + "instances": [ + { "avzone": "us-central-1a", "cluster": "sassyseal" }, + { "avzone": "us-central-2a", "cluster": "jollyllama" }, + { "avzone": "us-east-2a", "cluster": "jumpyjackal" } + ], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x05655a9b3C902ceC9a13CfB61bc8f1FAfCdE7Aa8": 1, + "0x0c409751A39422fb09dbd0DB2EE0a2E69Bb29f40": 1, + "0x2701d6319108F711a8e435E3778340E359b8eaEd": 1 + }, + "asks_updated": "2024-05-11T03:05:24.933183344Z", + "gpus": { "": 0 }, + "qps": 0.06666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 1, + "throughput_out": 0.6, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-central-2a", + "cluster": "jollyllama", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.05000000000000001, + "qps": 0.06666666666666667, + "throughput_in": 1, + "throughput_out": 0.6, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6db78e620478cfa144254", + "name": "meta-llama/Llama-2-7b-hf", + "display_name": "LLaMA-2 (7B)", + "display_type": "language", + "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", + "license": "LLaMA license Agreement (Meta)", + "link": "https://huggingface.co/meta-llama/Llama-2-7b-hf", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "6738415616", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { "stop": [""] }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2023-07-18T22:46:55.042Z", + "instances": [{ "avzone": "us-west-1a", "cluster": "curiouscrow" }], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x5d723e1Ad99BDdd03b5C442fd5b487a86Dc42c48": 1 }, + "asks_updated": "2024-05-10T21:28:57.525357288Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-west-1a", + "cluster": "curiouscrow", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6620daf44b2da307838b7cf1", + "name": "meta-llama/Llama-3-70b-chat-hf", + "display_name": "Meta Llama 3 70B Instruct", + "display_type": "chat", + "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", + "license": "Llama-3 (Other)", + "link": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "num_parameters": 70000000000, + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 8192, + "owner_userid": null, + "config": { + "stop": ["<|eot_id|>"], + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}", + "bos_token": "<|begin_of_text|>", + "eos_token": "<|end_of_text|>", + "add_generation_prompt": true + }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2024-04-18T08:33:56.492Z", + "update_at": "2024-04-24T19:06:49.423Z", + "autopilot_pool": "cr-a100-80-2x", + "instances": [ + { "avzone": "us-south-1a", "cluster": "mustymarfa" }, + { "avzone": "us-east-1a", "cluster": "happypiglet" }, + { "avzone": "us-central-5b", "cluster": "blusterybull" }, + { "avzone": "us-central-6a", "cluster": "mirthfulmonkey" } + ], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 20, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x005c07c763D13C42c16bcBF1843a188C91BeAFE4": 1, + "0x0Af678C6EBA809c95e3865762EC5A5ED93760CDc": 1, + "0x1b1121F2b72c2B4A0B7E510eCC54d6258A1d9F37": 1, + "0x1b4202742bb1c8f9d489adb4E2E3Fe632306FC81": 1, + "0x46b05D3f5674618344647C66538f94Ce85447310": 1, + "0x5E700261AE0a2b4F9D17C30558dfB000ddC55443": 1, + "0x5eD6330f693ee11865F36c8B2DE699C7A7D39a63": 1, + "0x680d04EFD3fA4E594AAEb5d0C04ce9F1090dFc3F": 1, + "0x72677CcC420192765c84cC20A6084F27f96Ee10D": 1, + "0x75B477C1E0F49DAc7D4be39C49Ec61BA67c376F2": 1, + "0x7D3BFcCbC57876224E5Bc1a212303B65bd370Ac5": 1, + "0x83dEe7D7B381E647511665f7AC2b2d70172B0E92": 1, + "0xA9B22801f41A96f4006d8c68B21207982d042201": 1, + "0xC5C7c6c638f24C9A60c5CA6DB4b373aa03209677": 1, + "0xEcc0D7cA34f421465Dc446C26A7614Dc38462693": 1, + "0xF573A972dA992d7a245B77eA3fB6c1B638f5Ff76": 1, + "0xFE26662735641dE91AEd95adb2c76C7DcF0Edc3c": 1, + "0xa191fbB16Af2E4A65714d20E5b1Eef3324DF3395": 1, + "0xbbB6B83dc920A8644EfB8e146d894AB64e96bA56": 1, + "0xc1d639C835f3c106DEE8e2c6591E6FC39ec0029D": 1, + "0xdcd96C00e09261DBab13B1F011DF7B0F5C1CeA92": 1, + "0xe7f47dD4666f588b91B1C7E605c4c7E2A08bbBD9": 1, + "0xeA0069EF1527e8Cd91C949f6965405B805852d16": 1 + }, + "asks_updated": "2024-05-11T12:26:15.222176169Z", + "gpus": { "": 0 }, + "qps": 12.4, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 11489.666666666666, + "throughput_out": 2889.3333333333335, + "error_rate": 0.06666666666666667, + "retry_rate": 0.13333333333333333, + "stats": [ + { + "avzone": "us-south-1a", + "cluster": "mustymarfa", + "capacity": 0.10318060911173708, + "qps": 6, + "throughput_in": 5823.4, + "throughput_out": 1358.7333333333333, + "error_rate": 0.06666666666666667, + "retry_rate": 0 + }, + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0.11713261648745518, + "qps": 2.1333333333333333, + "throughput_in": 1762.7333333333333, + "throughput_out": 643.2, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-central-5b", + "cluster": "blusterybull", + "capacity": 0.14247052101189547, + "qps": 2.7333333333333334, + "throughput_in": 2837, + "throughput_out": 670.0666666666667, + "error_rate": 0, + "retry_rate": 0.13333333333333333 + }, + { + "avzone": "us-central-6a", + "cluster": "mirthfulmonkey", + "capacity": 0.12173566741121672, + "qps": 1.5333333333333334, + "throughput_in": 1066.5333333333333, + "throughput_out": 217.33333333333334, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6620b8bf4b2da307838b7cf0", + "name": "meta-llama/Llama-3-8b-chat-hf", + "display_name": "Meta Llama 3 8B Instruct", + "display_type": "chat", + "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", + "license": "Llama-3 (Other)", + "link": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct", + "creator_organization": "Meta", + "pricing_tier": null, + "num_parameters": 8000000000, + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 8192, + "owner_userid": null, + "config": { + "stop": ["<|eot_id|>"], + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}", + "bos_token": "<|begin_of_text|>", + "eos_token": "<|end_of_text|>", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-04-18T06:07:59.041Z", + "update_at": "2024-04-24T19:14:26.075Z", + "instances": [ + { "avzone": "us-south-1a", "cluster": "mustymarfa" }, + { "avzone": "us-central-5b", "cluster": "blusterybull" }, + { "avzone": "us-east-1a", "cluster": "happypiglet" } + ], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x51ED0439B7c9e057aCF5357ec96311b9Ad479069": 1, + "0xa463152B8228A26253897AE01A8C252692B48ae7": 1, + "0xc84A8565C896870EE5ab16688B240F4c7625F5Bb": 1 + }, + "asks_updated": "2024-05-10T16:26:56.562519239Z", + "gpus": { "": 0 }, + "qps": 7.466666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 6643.066666666667, + "throughput_out": 382.8666666666667, + "retry_rate": 3.0666666666666664, + "stats": [ + { + "avzone": "us-south-1a", + "cluster": "mustymarfa", + "capacity": 0.034627474728759836, + "qps": 4.8, + "throughput_in": 4375, + "throughput_out": 238.06666666666666, + "error_rate": 0, + "retry_rate": 1.6 + }, + { + "avzone": "us-central-5b", + "cluster": "blusterybull", + "capacity": 0.0434983732441059, + "qps": 2.6666666666666665, + "throughput_in": 2268.0666666666666, + "throughput_out": 144.8, + "error_rate": 0, + "retry_rate": 1.4666666666666666 + }, + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6620db1d4b2da307838b7cf3", + "name": "meta-llama/Llama-3-8b-hf", + "display_name": "Meta Llama 3 8B", + "display_type": "language", + "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", + "license": "", + "link": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", + "creator_organization": "Meta", + "pricing_tier": null, + "num_parameters": 8000000000, + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 8192, + "owner_userid": null, + "config": null, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-04-18T08:34:37.676Z", + "update_at": "2024-04-18T09:12:37.169Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 3, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x050037E2D27C826eC7023868FD2A7bc3d3A76329": 1, + "0x74049692cFE55bA343E3a4AEA34Bc1Bef566683D": 1, + "0xF7C536aD4Bb0F90ED75eAEA7625FD5F16d590a47": 1 + }, + "asks_updated": "2024-05-11T06:51:42.031768854Z", + "gpus": { "": 0 }, + "qps": 8.6, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 1792.0666666666666, + "throughput_out": 67.8, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.04535547483134403, + "qps": 8.6, + "throughput_in": 1792.0666666666666, + "throughput_out": 67.8, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "66215c615f70248d1cd89d9f", + "name": "meta-llama/LlamaGuard-2-8b", + "display_name": "Meta Llama Guard 2 8B", + "display_type": "language", + "description": null, + "license": "Llama-3 (Other)", + "link": "", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "num_parameters": 8000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "owner_userid": null, + "config": null, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-04-18T08:36:20.125Z", + "update_at": "2024-04-18T09:12:23.195Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 4, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x596711d7a0070782D77096054AFFFEe58A8Bd103": 1, + "0x711eAb4579879484478CE6b8cF03BfFc05C00352": 1, + "0xDD20d200A273BB014f5bA9E2f35911318e491Fc7": 1, + "0xd2492F50fC64e43df995B72310C4E6C66123A3eE": 1 + }, + "asks_updated": "2024-05-11T06:51:32.042258521Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.06422764227642279, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6620db134b2da307838b7cf2", + "name": "meta-llama/Meta-Llama-3-70B", + "display_name": "Meta Llama 3 70B", + "display_type": "language", + "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", + "license": "", + "link": "https://huggingface.co/meta-llama/Meta-Llama-3-70B", + "creator_organization": "Meta", + "pricing_tier": null, + "num_parameters": 70000000000, + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 8192, + "owner_userid": null, + "config": null, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2024-04-18T08:34:27.131Z", + "update_at": "2024-04-18T08:34:27.131Z", + "autopilot_pool": "cr-a100-80-2x", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x64fdF920b9FD14b19eE5E26722815b611A9969f6": 1 }, + "asks_updated": "2024-05-10T18:50:48.909389392Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0.03333333333333333, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "661d747e2bfa86bd832690c1", + "name": "microsoft/WizardLM-2-8x22B", + "display_name": "WizardLM-2 (8x22B)", + "display_type": "chat", + "description": "WizardLM-2 8x22B is Wizard's most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models.", + "license": "apache-2.0", + "link": "https://huggingface.co/microsoft/WizardLM-2-8x22B", + "creator_organization": "microsoft", + "pricing_tier": "Featured", + "num_parameters": 141000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 65536, + "owner_userid": null, + "config": { + "prompt_format": null, + "stop": [""], + "chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'system' %}{{ message['content'] + ' ' }}{% elif message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + eos_token + '\\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT: ' }}{% endif %}", + "add_generation_prompt": true, + "bos_token": "", + "eos_token": "" + }, + "pricing": { "input": 300, "output": 300, "hourly": 0 }, + "created_at": "2024-04-15T18:39:58.959Z", + "update_at": "2024-04-15T18:39:58.959Z", + "instances": [ + { "avzone": "us-east-1a", "cluster": "happypiglet" }, + { "avzone": "us-east-2a", "cluster": "jumpyjackal" } + ], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x2C8A1eFdc1D636f96e2Fea8B19E54B8a7aD021b2": 1, + "0xBd84E6F6Cf17E934BABcEa323b37fEe12b5B954c": 1, + "0xe82fd7645e8520bbB23989fda5d89B3014089d91": 1 + }, + "asks_updated": "2024-05-11T05:55:32.53293869Z", + "gpus": { "": 0 }, + "qps": 0.33333333333333337, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 1390.4666666666667, + "throughput_out": 79.13333333333333, + "error_rate": 0.13333333333333333, + "retry_rate": 0.7333333333333334, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 1.6833333333333336, + "qps": 0.2, + "throughput_in": 1100.6666666666667, + "throughput_out": 38.333333333333336, + "error_rate": 0.06666666666666667, + "retry_rate": 0.4666666666666667 + }, + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 1.1199999999999999, + "qps": 0.13333333333333333, + "throughput_in": 289.8, + "throughput_out": 40.8, + "error_rate": 0.06666666666666667, + "retry_rate": 0.26666666666666666 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65b40661251b2ff9f146d8ba", + "name": "microsoft/phi-2", + "display_name": "Microsoft Phi-2", + "display_type": "language", + "description": "Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)", + "license": "mit", + "link": "https://huggingface.co/microsoft/phi-2", + "creator_organization": "Microsoft", + "pricing_tier": "Featured", + "num_parameters": 2700000000, + "release_date": "2024-01-26T19:22:09.533Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 2048, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2024-01-26T19:22:09.533Z", + "update_at": "2024-01-26T19:23:46.072Z", + "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }], + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x87392e41b7E545004263879B84b6142a49C5fF49": 1 }, + "asks_updated": "2024-05-11T02:58:55.400693381Z", + "gpus": { "": 0 }, + "qps": 0.06666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 39.666666666666664, + "throughput_out": 96.86666666666666, + "stats": [ + { + "avzone": "us-central-2a", + "cluster": "jollyllama", + "capacity": 0, + "qps": 0.06666666666666667, + "throughput_in": 39.666666666666664, + "throughput_out": 96.86666666666666, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6514c873829715ded9cd17b1", + "name": "mistralai/Mistral-7B-Instruct-v0.1", + "display_name": "Mistral (7B) Instruct", + "display_type": "chat", + "description": "instruct fine-tuned version of Mistral-7B-v0.1", + "license": "Apache-2", + "creator_organization": "mistralai", + "hardware_label": "2x A100 80GB", + "num_parameters": 7241732096, + "release_date": "2023-09-27T00:00:00.000Z", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "add_generation_prompt": true, + "stop": ["[/INST]", ""], + "prompt_format": "[INST] {prompt} [/INST]", + "chat_template_name": "llama", + "tools_template": "{{ '<>\\n' + systemMessage['content'] + '\\n\\nYou can access the following functions. Use them if required -\\n' + tools + '\\n<>\\n\\n' + message['content'] }}" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-09-28T00:27:31.815Z", + "update_at": "2023-10-12T01:13:51.840Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 5, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x2940b4a8aC66Ea56De5E30E1b8117b1A2840183C": 1, + "0x8Ec3cC96947C568792b81B26ed32588F74EEA967": 1, + "0x8fdd35098544851F45a1AF21dE3F715aBaE775D3": 1, + "0xD2af9bC18606b1851EC31D25A70634399eeFa07f": 1, + "0xd2A54B882E5b8157aFdbaf0002a046420b316773": 1 + }, + "asks_updated": "2024-05-11T11:39:13.802166171Z", + "gpus": { "": 0 }, + "qps": 7.6, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 1363.1333333333334, + "throughput_out": 543.4666666666667, + "retry_rate": 0.06666666666666667, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0.1781045751633987, + "qps": 7.6, + "throughput_in": 1363.1333333333334, + "throughput_out": 543.4666666666667, + "error_rate": 0, + "retry_rate": 0.06666666666666667 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65776c7d6923087ddd5a660a", + "name": "mistralai/Mistral-7B-Instruct-v0.2", + "display_name": "Mistral (7B) Instruct v0.2", + "display_type": "chat", + "description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.", + "license": "apache-2.0", + "creator_organization": "mistralai", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "release_date": "2023-11-01T00:00:00.000Z", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["[/INST]", ""], + "chat_template_name": "llama", + "tools_template": "{{ 'If you need to invoke any of the following functions:\n' + tools + '\nplease respond in the following JSON format:\n[\n\n {\n \"name\": \"the name of the function to be invoked\",\n \"arguments\": {\"key1\": \"value1\", \"key2\": \"value2\", ...}\n }\n]\nIf any required arguments are missing, please ask for them without JSON function calls.\nIf the instruction does not necessitate a function call, please provide your response in clear, concise natural language.\n\n' + message['content'] }}", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-12-11T20:09:33.627Z", + "update_at": "2023-12-11T20:09:33.627Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "access": "", + "hardware_label": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xBfdCCF308cFc94E79C8E8547B98A908aEAE2378e": 1 }, + "asks_updated": "2024-05-11T11:45:09.032835704Z", + "gpus": { "": 0 }, + "qps": 1.6666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 2454.866666666667, + "throughput_out": 136.66666666666666, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0.18639455782312941, + "qps": 1.6666666666666667, + "throughput_in": 2454.866666666667, + "throughput_out": 136.66666666666666, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6514c6ee829715ded9cd17b0", + "name": "mistralai/Mistral-7B-v0.1", + "display_name": "Mistral (7B)", + "display_type": "language", + "description": "7.3B parameter model that outperforms Llama 2 13B on all benchmarks, approaches CodeLlama 7B performance on code, Uses Grouped-query attention (GQA) for faster inference and Sliding Window Attention (SWA) to handle longer sequences at smaller cost", + "license": "Apache-2", + "creator_organization": "mistralai", + "hardware_label": "2x A100 80GB", + "num_parameters": 7241732096, + "release_date": "2023-09-27T00:00:00.000Z", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { "stop": [""], "prompt_format": "{prompt}", "chat_template_name": "llama" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-09-28T00:21:02.330Z", + "update_at": "2023-09-28T00:21:02.330Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x5a5E928538914B96C7EC31617cD026F8C92F7ad8": 1 }, + "asks_updated": "2024-05-10T15:42:13.775244857Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6620059786c156450dc1e445", + "name": "mistralai/Mixtral-8x22B-Instruct-v0.1", + "display_name": "Mixtral-8x22B Instruct v0.1", + "display_type": "chat", + "description": "The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.", + "license": "apache-2.0", + "link": "https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1", + "creator_organization": "mistralai", + "pricing_tier": "Featured", + "num_parameters": 141000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 65536, + "owner_userid": null, + "config": { + "stop": ["", "[/INST]"], + "chat_template": "{{bos_token}}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\n' + system_message + '\n<>\n\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\n' + content + '\n<>\n\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content + ' ' + eos_token }}{% endif %}{% endfor %}", + "bos_token": "", + "eos_token": "" + }, + "pricing": { "input": 300, "output": 300, "hourly": 0 }, + "created_at": "2024-04-17T17:23:35.226Z", + "update_at": "2024-05-03T01:20:25.932Z", + "instances": [ + { "avzone": "us-central-6a", "cluster": "mirthfulmonkey" }, + { "avzone": "eu-central-1a", "cluster": "merrymeerkat" }, + { "avzone": "us-south-1a", "cluster": "mustymarfa" } + ], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 3, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x0d23D3C623ed85942E3e521C45f6513161F7F97d": 1, + "0x1beCfdc13118Ff40C81de8eD43E375C87C95212D": 1, + "0x2da9932EA4909E6d2CDB6b8E551a690F6c054c77": 1, + "0xB2091d3D7166e8BA28a835AF2a2Ec4d71e774f8D": 1, + "0xF081B01E37A100ff8E1ef380C6D8Dd29098355D2": 1 + }, + "asks_updated": "2024-05-10T22:34:03.05610807Z", + "gpus": { "": 0 }, + "qps": 0.4666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 214.59999999999997, + "throughput_out": 62.33333333333333, + "stats": [ + { + "avzone": "us-central-6a", + "cluster": "mirthfulmonkey", + "capacity": 0.125, + "qps": 0.2, + "throughput_in": 92.33333333333333, + "throughput_out": 26.4, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "eu-central-1a", + "cluster": "merrymeerkat", + "capacity": 0.16666666666666666, + "qps": 0.06666666666666667, + "throughput_in": 30.4, + "throughput_out": 8.866666666666667, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-south-1a", + "cluster": "mustymarfa", + "capacity": 0, + "qps": 0.2, + "throughput_in": 91.86666666666666, + "throughput_out": 27.066666666666666, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "66165fa701f2f8a98997bf8e", + "name": "mistralai/Mixtral-8x22B", + "display_name": "Mixtral-8x22B", + "display_type": "language", + "description": "The Mixtral-8x22B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.", + "license": "apache-2.0", + "link": "", + "creator_organization": "mistralai", + "pricing_tier": "Featured", + "num_parameters": 138000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 65536, + "owner_userid": null, + "config": { + "prompt_format": null, + "stop": [""], + "chat_template_name": null, + "chat_template": null + }, + "pricing": { "input": 300, "output": 300, "hourly": 0 }, + "created_at": "2024-04-10T09:45:11.291Z", + "update_at": "2024-04-10T09:45:11.291Z", + "instances": [{ "avzone": "us-central-5b", "cluster": "blusterybull" }], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xB5c9cBC845ecB19eF96AfD7c685C249063D045B9": 1 }, + "asks_updated": "2024-05-10T22:30:33.867778162Z", + "gpus": { "": 0 }, + "qps": 0.13333333333333333, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 765.5333333333333, + "throughput_out": 17.733333333333334, + "stats": [ + { + "avzone": "us-central-5b", + "cluster": "blusterybull", + "capacity": 0.14814814814814814, + "qps": 0.13333333333333333, + "throughput_in": 765.5333333333333, + "throughput_out": 17.733333333333334, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6577af4434e6c1e2bb5283d8", + "name": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "display_name": "Mixtral-8x7B Instruct v0.1", + "display_type": "chat", + "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.", + "license": "apache-2.0", + "link": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1", + "creator_organization": "mistralai", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "56000000000", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["[/INST]", ""], + "chat_template_name": "llama", + "tools_template": "{{ '<>\\n' + systemMessage['content'] + '\\n\\nYou can access the following functions. Use them if required -\\n' + tools + '\\n<>\\n\\n' + message['content'] }}", + "add_generation_prompt": true + }, + "pricing": { "input": 150, "output": 150, "hourly": 0 }, + "created_at": "2023-12-12T00:54:28.108Z", + "update_at": "2024-02-08T07:58:24.624Z", + "autopilot_pool": "cr-a100-80-2x", + "instances": [ + { "avzone": "us-central-5b", "cluster": "blusterybull" }, + { "avzone": "us-south-1a", "cluster": "mustymarfa" }, + { "avzone": "us-central-6a", "cluster": "mirthfulproxy2" } + ], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 9, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x087B53d30E40D00f2f9FB6b5320F869b0440b3d2": 1, + "0x1f9b37D43762A2E68f79f27037970F252Ae9dc72": 1, + "0x278DE515De2340c72109ef8DaEf2142cD50dE05a": 1, + "0x304C274001CFe1eE95a69F28aC7Bd2DE696Fe31F": 1, + "0x331ad91912c531dCC1c9dF21d624D05A83FA8798": 1, + "0x3Fb77Dfc9Fb62f547C877eeD099836F714862e75": 1, + "0x6E7Bea97F507e915D455f7480a12BFFbD50f8F0B": 1, + "0xDc4d873003AE654ed69d4B2c460d9525F0B82322": 1, + "0xEdb6fdfbcb1Fb0438275066e5314D44252A54A5c": 1, + "0xd40bD5046cfDC4AcB83DD0c37c0Bae8761c77785": 1, + "0xfa3161803d23a65ffcD5f31d1aA17e8A77c9F416": 1 + }, + "asks_updated": "2024-05-11T00:35:53.627294183Z", + "gpus": { "": 0 }, + "qps": 3, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 11466.333333333334, + "throughput_out": 282.9333333333333, + "stats": [ + { + "avzone": "us-central-5b", + "cluster": "blusterybull", + "capacity": 0.07692307692307691, + "qps": 0.8, + "throughput_in": 1433.1333333333334, + "throughput_out": 69.33333333333333, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-south-1a", + "cluster": "mustymarfa", + "capacity": 0.09677419354838712, + "qps": 0.9333333333333333, + "throughput_in": 4351.6, + "throughput_out": 78, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-central-6a", + "cluster": "mirthfulproxy2", + "capacity": 0.08333333333333331, + "qps": 1.2666666666666666, + "throughput_in": 5681.6, + "throughput_out": 135.6, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6577bf1034e6c1e2bb5283d9", + "name": "mistralai/Mixtral-8x7B-v0.1", + "display_name": "Mixtral-8x7B v0.1", + "display_type": "language", + "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.", + "license": "apache-2.0", + "link": "https://huggingface.co/mistralai/Mixtral-8x7B-v0.1", + "creator_organization": "mistralai", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "56000000000", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 32768, + "pricing": { "input": 150, "output": 150, "hourly": 0 }, + "created_at": "2023-12-12T02:01:52.674Z", + "update_at": "2024-02-08T07:58:39.848Z", + "autopilot_pool": "cr-a100-80-2x", + "instances": [{ "avzone": "us-central-6a", "cluster": "mirthfulproxy2" }], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xc261f35de549E945122BFd444d947873cb8ca48c": 1 }, + "asks_updated": "2024-05-10T15:20:17.076895388Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-6a", + "cluster": "mirthfulproxy2", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "657b7a2a84ef58c3562de91e", + "name": "openchat/openchat-3.5-1210", + "display_name": "OpenChat 3.5", + "display_type": "chat", + "description": "A merge of OpenChat 3.5 was trained with C-RLFT on a collection of publicly available high-quality instruction data, with a custom processing pipeline.", + "license": "apache-2.0", + "link": "https://huggingface.co/openchat/openchat-3.5-1210", + "creator_organization": "OpenChat", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "7000000000", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "config": { + "chat_template": "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'] + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}", + "stop": ["<|end_of_turn|>", ""], + "add_generation_prompt": true, + "bos_token": "", + "prompt_format": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-12-14T21:56:58.576Z", + "update_at": "2023-12-14T21:56:58.576Z", + "instances": [{ "avzone": "ap-northeast-1a", "cluster": "optimisticotter" }], + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x7c2e432720fC11Cd177eFf01BD7Fb55B705EFB2E": 1 }, + "asks_updated": "2024-05-10T16:30:29.082656593Z", + "gpus": { "": 0 }, + "qps": 0.3333333333333333, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 521.8, + "throughput_out": 51.333333333333336, + "stats": [ + { + "avzone": "ap-northeast-1a", + "cluster": "optimisticotter", + "capacity": 0.03667953667953668, + "qps": 0.3333333333333333, + "throughput_in": 521.8, + "throughput_out": 51.333333333333336, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64aced5c227f790586239d2b", + "name": "prompthero/openjourney", + "display_name": "Openjourney v4", + "display_type": "image", + "description": "An open source Stable Diffusion model fine tuned model on Midjourney images. ", + "license": "creativeml-openrail-m", + "link": "https://huggingface.co/prompthero/openjourney", + "creator_organization": "Prompt Hero", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": 13000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "external_pricing_url": "https://www.together.xyz/apis#pricing", + "config": { "height": 512, "width": 512, "steps": 20, "number_of_images": 2, "seed": 42 }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-07-11T05:49:16.586Z", + "update_at": "2023-07-11T05:49:16.586Z", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 1, + "num_running": 1, + "asks": { + "0x5C5b60Ea2C7046FDdf7F7be3853d046301334a85": 1, + "0xB2bFeaa446Cc0376249ed2d7a8f5C32E0705e556": 1 + }, + "asks_updated": "2024-05-11T09:07:32.233340141Z", + "gpus": { "NVIDIA A40": 2 }, + "options": { "input=text,image": 2 }, + "qps": 0.013067961, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 0.23658435 + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1312907e072b8aece1", + "name": "runwayml/stable-diffusion-v1-5", + "display_name": "Stable Diffusion 1.5", + "display_type": "image", + "description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.", + "license": "creativeml-openrail-m", + "link": "https://huggingface.co/runwayml/stable-diffusion-v1-5", + "creator_organization": "Runway ML", + "hardware_label": "A100 80GB", + "pricing_tier": "featured", + "show_in_playground": true, + "isFeaturedModel": true, + "external_pricing_url": "https://www.together.xyz/apis#pricing", + "config": { "height": 512, "width": 512, "steps": 20, "number_of_images": 2, "seed": 42 }, + "created_at": "2023-06-23T20:22:43.572Z", + "update_at": "2023-06-23T20:22:43.572Z", + "access": "", + "descriptionLink": "", + "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 }, + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x98D41CFC96e488D9810431B65Aa98EBfc87b73c8": 1 }, + "asks_updated": "2024-05-11T02:38:42.957010375Z", + "gpus": { "NVIDIA A40": 1 }, + "options": { "input=text,image": 1 }, + "qps": 0.015545072, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 0.23383653 + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65460075c5ce2e5fa70d6721", + "name": "sentence-transformers/msmarco-bert-base-dot-v5", + "display_name": "Sentence-BERT", + "display_type": "embedding", + "description": "A sentence-transformers model: it maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search.", + "license": "apache-2.0", + "link": "https://huggingface.co/sentence-transformers/msmarco-bert-base-dot-v5", + "creator_organization": "Together", + "hardware_label": "L40", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 110000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 512, + "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 }, + "created_at": "2023-11-04T08:27:33.867Z", + "update_at": "2023-12-22T03:15:44.832Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "descriptionLink": "", + "depth": { + "num_asks": 4, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x1a1b0dB24Fdfd5E05AF9177A80fbB0C049a3b63b": 1, + "0x662c7EE2ca9D3D4fAbcEE2286C1bbc5f24CA02fD": 1, + "0x834Dfa4EeF072100CcBC96fA3871d6f62Ce02455": 1, + "0xfE0CBc639aB99C5995B77cBd6aCCB0F29208186D": 1 + }, + "asks_updated": "2024-05-11T03:04:31.159265923Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65b454f3d9877b0bd1376470", + "name": "snorkelai/Snorkel-Mistral-PairRM-DPO", + "display_name": "Snorkel Mistral PairRM DPO (7B)", + "display_type": "chat", + "description": "A state-of-the-art model by Snorkel AI, DPO fine-tuned on Mistral-7B", + "license": "apache-2.0", + "creator_organization": "Snorkel AI", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "release_date": "2024-01-27T00:57:23.638Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["[/INST]", ""], + "chat_template_name": "llama", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-01-27T00:57:23.638Z", + "update_at": "2024-01-27T14:24:41.745Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "access": "", + "hardware_label": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x905d9333Bf36FC9fD26b130adaaEe6f5Bd4E800f": 1 }, + "asks_updated": "2024-05-10T18:09:23.39444282Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.1111111111111111, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acef00227f790586239d3b", + "name": "stabilityai/stable-diffusion-2-1", + "display_name": "Stable Diffusion 2.1", + "display_type": "image", + "description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.", + "license": "openrail++", + "link": "https://huggingface.co/stabilityai/stable-diffusion-2-1", + "creator_organization": "Stability AI", + "hardware_label": "A100 80GB", + "pricing_tier": "featured", + "show_in_playground": true, + "isFeaturedModel": true, + "external_pricing_url": "https://www.together.xyz/apis#pricing", + "created_at": "2023-06-23T20:22:43.572Z", + "update_at": "2023-06-23T20:22:43.572Z", + "access": "", + "descriptionLink": "", + "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 }, + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xC9494f3A014EAC6DD43De5b03E03364F1AcC9ea7": 1 }, + "asks_updated": "2024-05-11T02:40:07.5915129Z", + "gpus": { "NVIDIA A100 80GB PCIe": 1 }, + "options": { "input=text,image": 1 }, + "qps": 0.02694962, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 2.008309 + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64c9890c689aa3b286cfcff9", + "name": "stabilityai/stable-diffusion-xl-base-1.0", + "display_name": "Stable Diffusion XL 1.0", + "display_type": "image", + "description": "A text-to-image generative AI model that excels at creating 1024x1024 images.", + "license": "openrail++", + "link": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0", + "creator_organization": "Stability AI", + "hardware_label": "A100 80GB", + "pricing_tier": "featured", + "access": "open", + "show_in_playground": true, + "isFeaturedModel": true, + "external_pricing_url": "https://www.together.xyz/apis#pricing", + "config": { + "seed": 1000, + "height": 1024, + "width": 1024, + "steps": 40, + "number_of_images": 4, + "optimized": { + "512x512": "-512-576-1024", + "576x1024": "-512-576-1024", + "1024x576": "-512-576-1024", + "1024x1024": "-512-576-1024" + } + }, + "created_at": "2023-08-01T22:37:00.851Z", + "update_at": "2023-08-01T22:37:00.851Z", + "descriptionLink": "", + "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 }, + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x2E595c6ee5e62FeFF9f426b239a2fB0970476593": 1 }, + "asks_updated": "2024-05-11T02:42:33.99917055Z", + "gpus": { "NVIDIA A100 80GB PCIe": 1 }, + "options": { "input=text,image": 1 }, + "qps": 0.018970164, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 0.9324918 + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "653c053fd9679a84df55c4e7", + "name": "teknium/OpenHermes-2-Mistral-7B", + "display_name": "OpenHermes-2-Mistral (7B)", + "display_type": "chat", + "description": "State of the art Mistral Fine-tuned on extensive public datasets", + "license": "Apache-2", + "creator_organization": "teknium", + "hardware_label": "A40", + "pricing_tier": "Featured", + "num_parameters": 7241732096, + "release_date": "2023-10-27T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "config": { + "stop": ["<|im_end|>", "<|im_start|>"], + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "pre_prompt": "<|im_start|>system\nYou are thoughtful, helpful, polite, honest, and friendly<|im_end|>\n", + "add_generation_prompt": true, + "chat_template_name": "default" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-10-27T18:45:19.307Z", + "update_at": "2023-10-27T23:53:05.438Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 3, + "num_bids": 2, + "num_running": 2, + "asks": { "0x24e7c0F944a664e4be6890a13Ce3cB0b930a2d9b": 3 }, + "asks_updated": "2024-05-10T15:23:33.984301148Z", + "gpus": { "": 0 }, + "qps": 0.3333333333333333, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 583.4, + "throughput_out": 66.66666666666667, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0.21153846153846154, + "qps": 0.3333333333333333, + "throughput_in": 583.4, + "throughput_out": 66.66666666666667, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "655667fe6664bf7229b2dc6c", + "name": "teknium/OpenHermes-2p5-Mistral-7B", + "display_name": "OpenHermes-2.5-Mistral (7B)", + "display_type": "chat", + "description": "Continuation of OpenHermes 2 Mistral model trained on additional code datasets", + "license": "Apache-2", + "creator_organization": "teknium", + "hardware_label": "A40", + "pricing_tier": "Featured", + "num_parameters": 7241732096, + "release_date": "2023-11-15T00:00:00.000Z", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 8192, + "config": { + "stop": ["<|im_end|>", "<|im_start|>"], + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "add_generation_prompt": true, + "chat_template_name": "default" + }, + "pricing": { "input": 50, "output": 50 }, + "created_at": "2023-11-16T19:05:34.976Z", + "update_at": "2023-11-16T19:12:24.883Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x0C761d48a95cFbc1969e97099a212D033AfE4c32": 1, + "0x478820b73D0745eBFB0E01a0de2c989dA2bC2fD8": 1 + }, + "asks_updated": "2024-05-11T07:42:07.630477632Z", + "gpus": { "": 0 }, + "qps": 2.6666666666666665, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 3478.8, + "throughput_out": 391.1333333333333, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.46472019464720205, + "qps": 2.6666666666666665, + "throughput_in": 3478.8, + "throughput_out": 391.1333333333333, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1312907e072b8aece3", + "name": "togethercomputer/GPT-JT-Moderation-6B", + "display_name": "GPT-JT-Moderation (6B)", + "display_type": "language", + "description": "This model can be used to moderate other chatbot models. Built using GPT-JT model fine-tuned on Ontocord.ai's OIG-moderation dataset v0.1.", + "license": "apache-2.0", + "link": "https://huggingface.co/togethercomputer/GPT-JT-Moderation-6B", + "creator_organization": "Together", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": 6700000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 2048, + "config": { + "chat_template": "Possible labels:\n1. casual\n2. needs caution\n3. needs intervention\n4. possibly needs caution\n5. probably needs caution\nInput: Hi!\nOutput: casual\nInput: I want to kill my cat.\nOutput: needs intervention\nInput: How to poison my dog?\nOutput: needs intervention\nInput: I don't want to talk to my brother\nOutput: probably needs caution\nInput: Today we had a disagreement and \nwe got into a screaming fight. I don't want to talk.\nOutput: probably needs caution\nInput: My brother is being an asshole... Fuck him.\nOutput: needs caution\nInput: Hello.\nOutput: casual\nInput: {% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}\nOutput:", + "safety_label": "casual", + "safety_config": { "temperature": 0.2, "top_p": 1, "max_tokens": 10, "stop": ["\n"] } + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-06-23T20:22:43.657Z", + "update_at": "2023-06-23T20:22:43.657Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "testytiger" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x3B2a3D16B0207aA9F14726eeE8cD0b9b388209A2": 1 }, + "asks_updated": "2024-05-10T17:30:19.429822841Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "testytiger", + "capacity": 0.05, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64c28e8742fa06a9511509d1", + "name": "togethercomputer/LLaMA-2-7B-32K", + "display_name": "LLaMA-2-32K (7B)", + "display_type": "language", + "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations.", + "license": "Meta license", + "link": "https://huggingface.co/togethercomputer/LLaMA-2-7B-32K", + "creator_organization": "Together", + "hardware_label": "2x A100 80GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": "6738415616", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { "stop": ["\n\n\n\n", "<|endoftext|>"], "chat_template_name": "llama" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-27T15:34:31.581Z", + "update_at": "2023-08-17T17:07:36.346Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x371EAf96c8Ee3BA499F0288c75c75d51112b2527": 1 }, + "asks_updated": "2024-05-11T09:43:39.29703399Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64de96090d052d10425df3c9", + "name": "togethercomputer/Llama-2-7B-32K-Instruct", + "display_name": "LLaMA-2-7B-32K-Instruct (7B)", + "display_type": "chat", + "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations, instruction tuned by Together", + "license": "Meta license", + "link": "https://huggingface.co/togethercomputer/Llama-2-7B-32K-Instruct", + "creator_organization": "Together", + "hardware_label": "2X A100 80GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 7000000000, + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "prompt_format": "[INST]\n {prompt} \n[/INST]\n\n", + "stop": ["[INST]", "\n\n"], + "chat_template_name": "llama" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-27T15:34:31.581Z", + "update_at": "2023-08-17T17:07:36.346Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "isFinetuned": false, + "descriptionLink": "", + "depth": { + "num_asks": 6, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x5EE34cdD225705f90B9e7F5CD9502B856f6c87BA": 1, + "0x6B5fee8e0895Bfa4AC5E9CCE41A6675899BEd4DF": 1, + "0x6fD51Dd59783f9FD230d1c7774AD36e9B7581857": 1, + "0x97c68E4Be603a6c4Bd4DDFa79B250B4EA92301cE": 1, + "0xa9dA26E9cd4a33BEd521d35f632c9b7bf0fBB712": 1, + "0xfa66a48eA14D80e094D57E0ECdF22Fc0B397D193": 1 + }, + "asks_updated": "2024-05-11T10:28:34.70870094Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 1, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1412907e072b8aeceb", + "name": "togethercomputer/RedPajama-INCITE-7B-Base", + "display_name": "RedPajama-INCITE (7B)", + "display_type": "language", + "description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).", + "descriptionLink": "https://www.together.xyz/blog/redpajama-models-v1", + "license": "apache-2.0", + "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Base", + "creator_organization": "Together", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": "6857302016", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 2048, + "config": { "chat_template_name": "gpt" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-06-23T20:22:44.033Z", + "update_at": "2023-06-23T20:22:44.033Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x3665A75f9d8F32a7721B1b43c4CA2eC18F7bdDD3": 1 }, + "asks_updated": "2024-05-11T02:42:29.667700181Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0.0078125, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1412907e072b8aeced", + "name": "togethercomputer/RedPajama-INCITE-7B-Chat", + "display_name": "RedPajama-INCITE Chat (7B)", + "display_type": "chat", + "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-7B-v1 base model.", + "license": "apache-2.0", + "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Chat", + "creator_organization": "Together", + "hardware_label": "A100 80GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": "6857302016", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 2048, + "config": { + "prompt_format": ": {prompt}\n:", + "stop": [""], + "chat_template_name": "gpt", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-06-23T20:22:44.190Z", + "update_at": "2023-06-23T20:22:44.190Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x5e066227A1e7f634cAEFaDc21527340A7E33a8d5": 1 }, + "asks_updated": "2024-05-11T02:42:20.102603502Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0.0078125, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1412907e072b8aecec", + "name": "togethercomputer/RedPajama-INCITE-7B-Instruct", + "display_name": "RedPajama-INCITE Instruct (7B)", + "display_type": "language", + "description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-7B-v1 base model.", + "license": "apache-2.0", + "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Instruct", + "creator_organization": "Together", + "hardware_label": "A100 80GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": "6857302016", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 2048, + "config": { "chat_template_name": "gpt" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-06-23T20:22:44.083Z", + "update_at": "2023-06-23T20:22:44.083Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xf692d4ef74617ec76153cC5D32C3b8b9bD5D2B2C": 1 }, + "asks_updated": "2024-05-11T02:41:42.647429798Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0.0078125, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1312907e072b8aece5", + "name": "togethercomputer/RedPajama-INCITE-Base-3B-v1", + "display_name": "RedPajama-INCITE (3B)", + "display_type": "language", + "description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).", + "descriptionLink": "https://www.together.xyz/blog/redpajama-models-v1", + "license": "apache-2.0", + "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-Base-3B-v1", + "creator_organization": "Together", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": "2775864320", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 2048, + "config": { "chat_template_name": "gpt" }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2023-06-23T20:22:43.751Z", + "update_at": "2023-06-23T20:22:43.751Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xe03dDF7b87500172ec519A5cf7982166CB27446d": 1 }, + "asks_updated": "2024-05-11T02:39:13.60509537Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0.0078125, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1312907e072b8aece7", + "name": "togethercomputer/RedPajama-INCITE-Chat-3B-v1", + "display_name": "RedPajama-INCITE Chat (3B)", + "display_type": "chat", + "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-3B-v1 base model.", + "license": "apache-2.0", + "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-Chat-3B-v1", + "creator_organization": "Together", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": "2775864320", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 2048, + "config": { + "add_generation_prompt": true, + "prompt_format": ": {prompt}\n:", + "stop": [""], + "chat_template_name": "gpt" + }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2023-06-23T20:22:43.839Z", + "update_at": "2023-06-23T20:22:43.839Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x4e6FE2a25309efA6b3279d5FacceA7393Bce9d7d": 1 }, + "asks_updated": "2024-05-11T02:42:34.056810771Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0.0078125, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1312907e072b8aece6", + "name": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", + "display_name": "RedPajama-INCITE Instruct (3B)", + "display_type": "language", + "description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-3B-v1 base model.", + "license": "apache-2.0", + "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-Instruct-3B-v1", + "creator_organization": "Together", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": "2775864320", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": true, + "context_length": 2048, + "config": { "chat_template_name": "gpt" }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2023-06-23T20:22:43.796Z", + "update_at": "2023-06-23T20:22:43.796Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x05DcC82776d9dE9f88714a57489F55Fe629253Df": 1 }, + "asks_updated": "2024-05-11T02:40:57.020944885Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0.0078125, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65735df36923087ddd5a6607", + "name": "togethercomputer/StripedHyena-Hessian-7B", + "display_name": "StripedHyena Hessian (7B)", + "display_type": "language", + "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers", + "license": "Apache-2", + "creator_organization": "Together", + "hardware_label": "H100", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "release_date": "2023-11-01T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "pricing": { "input": 50, "output": 50 }, + "created_at": "2023-12-08T18:18:27.005Z", + "update_at": "2023-12-08T19:03:32.567Z", + "instances": [{ "avzone": "ap-northeast-1a", "cluster": "optimisticotter" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x67950b5BFae9be9c326c098be6ED4C6eBfDF21AC": 1 }, + "asks_updated": "2024-05-10T17:29:30.55649053Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "ap-northeast-1a", + "cluster": "optimisticotter", + "capacity": 0.03571428571428571, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65735d536923087ddd5a6606", + "name": "togethercomputer/StripedHyena-Nous-7B", + "display_name": "StripedHyena Nous (7B)", + "display_type": "chat", + "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers", + "license": "Apache-2", + "creator_organization": "Together", + "hardware_label": "H100", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "release_date": "2023-11-01T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "config": { + "stop": ["###", ""], + "prompt_format": "### Instruction:\n{prompt}\n\n### Response:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + '### Instruction:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'system' %}{{ '### System:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response:\\n' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ '### Response:\\n' }}{% endif %}{% endfor %}", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50 }, + "created_at": "2023-12-08T18:15:47.433Z", + "update_at": "2023-12-08T19:03:11.497Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xDf09DC5Df2B0116b09cB52E358e1bAbdE797c383": 1 }, + "asks_updated": "2024-05-10T22:20:02.651692436Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0.05263157894736842, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64ace317227f790586239ce2", + "name": "togethercomputer/alpaca-7b", + "display_name": "Alpaca (7B)", + "display_type": "chat", + "description": "Fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations. ", + "license": "cc-by-nc-4.0", + "link": "https://huggingface.co/tatsu-lab/alpaca-7b-wdiff", + "creator_organization": "Stanford", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 2048, + "config": { + "stop": ["", "###"], + "add_generation_prompt": true, + "prompt_format": "### Instruction:\n{prompt}\n### Response:\n", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-11T05:05:27.713Z", + "update_at": "2023-07-11T05:05:27.713Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xCDb16b84A6C85ceEa98b9A423fFc3DB6c94B79ba": 1 }, + "asks_updated": "2024-05-11T02:41:04.792218224Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65df8df823e6726c2d053851", + "name": "togethercomputer/evo-1-131k-base", + "display_name": "Evo-1 Base (131K)", + "display_type": "language", + "description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.", + "license": "apache-2.0", + "link": "https://huggingface.co/togethercomputer/evo-1-131k-base", + "creator_organization": "Together", + "pricing_tier": "Featured", + "num_parameters": 6450000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 131073, + "pricing": { "input": 500, "output": 500, "hourly": 0 }, + "created_at": "2024-02-28T19:48:08.106Z", + "update_at": "2024-02-28T19:48:08.106Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x23e316184d0b6427b720e31f9390617C2094C570": 1 }, + "asks_updated": "2024-05-10T16:39:02.557083364Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65df8d9623e6726c2d053850", + "name": "togethercomputer/evo-1-8k-base", + "display_name": "Evo-1 Base (8K)", + "display_type": "language", + "description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.", + "license": "apache-2.0", + "link": "https://huggingface.co/togethercomputer/evo-1-8k-base", + "creator_organization": "Together", + "pricing_tier": "Featured", + "num_parameters": 6450000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "pricing": { "input": 500, "output": 500, "hourly": 0 }, + "created_at": "2024-02-28T19:46:30.585Z", + "update_at": "2024-04-19T18:58:00.962Z", + "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }], + "isPrivate": false, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x80D5014da1fBfBe7069d979125Ebc6EFB4e21446": 1 }, + "asks_updated": "2024-05-10T11:50:39.384174189Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-5a", + "cluster": "wrigleycub", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6553b8da6664bf7229b2dbfb", + "name": "togethercomputer/m2-bert-80M-2k-retrieval", + "display_name": "M2-BERT-Retrieval-2K", + "display_type": "embedding", + "description": "M2-BERT from the Monarch Mixer paper fine-tuned for retrieval", + "license": "Apache-2", + "creator_organization": "Together", + "hardware_label": "L40", + "pricing_tier": "Featured", + "num_parameters": 80000000, + "release_date": "2023-11-01T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 }, + "created_at": "2023-11-14T18:13:46.901Z", + "update_at": "2024-02-21T20:06:27.968Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x49c8561e8042ef95A4b011A0AB216d6171aAb80a": 1, + "0xD797d4629f4ec41203c03e9F417A1b26C165429c": 1 + }, + "asks_updated": "2024-05-11T02:57:23.797140598Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6585058be7e2e898e81b5401", + "name": "togethercomputer/m2-bert-80M-32k-retrieval", + "display_name": "M2-BERT-Retrieval-32k", + "display_type": "embedding", + "description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.", + "license": "apache-2.0", + "link": "https://huggingface.co/togethercomputer/m2-bert-80M-32k-retrieval", + "creator_organization": "Together", + "hardware_label": "L40", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 80000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 32768, + "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 }, + "created_at": "2023-11-04T17:57:24.532Z", + "update_at": "2023-11-04T17:57:24.532Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "descriptionLink": "", + "depth": { + "num_asks": 3, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x642D1948083206E1B93C913F7540075bAAbdeE09": 1, + "0xac486C2C89f7954b1Eb96281162268cFcCF8b113": 1, + "0xc6B6cfB1A480437a012553De16eD296a73a8fB68": 1 + }, + "asks_updated": "2024-05-11T02:48:07.105736356Z", + "gpus": { "": 0 }, + "qps": 0.06666666666666667, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 3360.0666666666666, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0.18125, + "qps": 0.06666666666666667, + "throughput_in": 3360.0666666666666, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65468604c5ce2e5fa70d6722", + "name": "togethercomputer/m2-bert-80M-8k-retrieval", + "display_name": "M2-BERT-Retrieval-8k", + "display_type": "embedding", + "description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.", + "license": "apache-2.0", + "link": "https://huggingface.co/togethercomputer/m2-bert-80M-8k-retrieval", + "creator_organization": "Together", + "hardware_label": "L40", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 80000000, + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 8192, + "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 }, + "created_at": "2023-11-04T17:57:24.532Z", + "update_at": "2023-11-04T17:57:24.532Z", + "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }], + "descriptionLink": "", + "depth": { + "num_asks": 3, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x3aA308521998f41e8B32704d6572468904c5068c": 1, + "0x3d324467D9c6F36d90247D092f4CF7c3409E4EC7": 1, + "0x5E3b1DA7f7b563d68126FB910d08a1afECdd4365": 1 + }, + "asks_updated": "2024-05-11T02:51:12.377041702Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-1a", + "cluster": "sassyseal", + "capacity": 0.0078125, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "657f7552a9c4049b6a42e4c6", + "name": "upstage/SOLAR-10.7B-Instruct-v1.0", + "display_name": "Upstage SOLAR Instruct v1 (11B)", + "display_type": "chat", + "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling", + "license": "cc-by-nc-4.0", + "creator_organization": "upstage", + "hardware_label": "A100B", + "pricing_tier": "Featured", + "num_parameters": 10700000000, + "release_date": "2023-12-01T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "add_generation_prompt": true, + "stop": ["<|im_end|>", "<|im_start|>"], + "chat_template": "{% for message in messages %}{{'<|im_start|>'}}{% if message['role'] == 'user' %}{{'user\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'assistant' %}{{'assistant\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'system' %}{{'system\n' + message['content'] + '<|im_end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}" + }, + "pricing": { "input": 75, "output": 75 }, + "created_at": "2023-12-17T22:25:22.252Z", + "update_at": "2023-12-17T22:32:58.075Z", + "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0x6e7b83610040F22561593472a4A239022A6fc7CE": 1 }, + "asks_updated": "2024-05-11T02:25:27.512989257Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-2a", + "cluster": "jumpyjackal", + "capacity": 0.023255813953488372, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64ace3af227f790586239ce6", + "name": "wavymulder/Analog-Diffusion", + "display_name": "Analog Diffusion", + "display_type": "image", + "description": "Dreambooth model trained on a diverse set of analog photographs to provide an analog film effect. ", + "license": "creativeml-openrail-m", + "link": "https://huggingface.co/wavymulder/Analog-Diffusion", + "creator_organization": "Wavymulder", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 0, + "show_in_playground": true, + "isFeaturedModel": true, + "external_pricing_url": "https://www.together.xyz/apis#pricing", + "created_at": "2023-07-11T05:07:59.364Z", + "update_at": "2023-07-11T05:07:59.364Z", + "descriptionLink": "", + "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 }, + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "asks": { "0xC830b3583bcA51887185318c0184fbdB622A55f5": 1 }, + "asks_updated": "2024-05-11T03:33:35.26300478Z", + "gpus": { "NVIDIA A40": 1 }, + "options": { "input=text,image": 1 }, + "qps": 0.012988208, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 0.23378775 + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "656a79054d805f78df5fd530", + "name": "zero-one-ai/Yi-34B-Chat", + "display_name": "01-ai Yi Chat (34B)", + "display_type": "chat", + "description": "The Yi series models are large language models trained from scratch by developers at 01.AI", + "license": "yi-license", + "creator_organization": "01.AI", + "hardware_label": "A100", + "pricing_tier": "Featured", + "num_parameters": 34000000000, + "release_date": "2023-11-01T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "config": { + "add_generation_prompt": true, + "stop": ["<|im_start|>", "<|im_end|>"], + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n", + "chat_template_name": "default" + }, + "pricing": { "input": 200, "output": 200, "base": 0 }, + "created_at": "2023-12-02T00:23:33.685Z", + "update_at": "2023-12-02T00:26:55.827Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 3, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x3faF4AfA52f2A5951B4bde877478B0BF4d69a023": 1, + "0x450aDd660C1B1fdB2A7f6bDAE850C4850594FbCD": 1, + "0x454Eef2b7f085F0134db5c728ac382aD0c4C9511": 1 + }, + "asks_updated": "2024-05-11T05:55:38.392763822Z", + "gpus": { "": 0 }, + "qps": 0.13333333333333333, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "throughput_in": 128.73333333333332, + "throughput_out": 34.266666666666666, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0.2, + "qps": 0.13333333333333333, + "throughput_in": 128.73333333333332, + "throughput_out": 34.266666666666666, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "656fa3548d9fd20968de9ba7", + "name": "zero-one-ai/Yi-34B", + "display_name": "01-ai Yi Base (34B)", + "display_type": "language", + "description": "The Yi series models are large language models trained from scratch by developers at 01.AI", + "license": "yi-license", + "creator_organization": "01.AI", + "hardware_label": "A100", + "pricing_tier": "Featured", + "num_parameters": 34000000000, + "release_date": "2023-11-01T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "pricing": { "input": 200, "output": 200 }, + "created_at": "2023-12-05T22:25:24.982Z", + "update_at": "2023-12-05T22:51:15.306Z", + "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 3, + "num_bids": 0, + "num_running": 0, + "asks": { + "0x09c253d0c4aB07a89D2f7d23A57eA31bdc760c54": 1, + "0x964972F1A61F8BAdbD6163b9888D284CC2E054E9": 1, + "0x9B35c58ef3E3425dEa8CBE5f39b8050e40193F68": 1 + }, + "asks_updated": "2024-05-11T12:20:16.94106496Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6570718281b9e1cf0455ec53", + "name": "zero-one-ai/Yi-6B", + "display_name": "01-ai Yi Base (6B)", + "display_type": "language", + "description": "The Yi series models are large language models trained from scratch by developers at 01.AI", + "license": "yi-license", + "creator_organization": "01.AI", + "hardware_label": "A100", + "pricing_tier": "Featured", + "num_parameters": 6000000000, + "release_date": "2023-11-01T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": true, + "context_length": 4096, + "pricing": { "input": 50, "output": 50 }, + "created_at": "2023-12-06T13:05:06.567Z", + "update_at": "2023-12-06T13:07:50.190Z", + "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }], + "access": "", + "link": "", + "descriptionLink": "", + "depth": { + "num_asks": 2, + "num_bids": 0, + "num_running": 0, + "asks": { + "0xB527b0625620ff3AACCEb84008A7A6684E2d6FbA": 1, + "0xf2337a3BA04f483bCb3DbF43584a398e83E20368": 1 + }, + "asks_updated": "2024-05-11T02:56:40.830191037Z", + "gpus": { "": 0 }, + "qps": 0, + "permit_required": false, + "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 }, + "stats": [ + { + "avzone": "us-central-2a", + "cluster": "jollyllama", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "66313f416fbdf5d304b833d1", + "name": "togethercomputer/Llama-3-8b-chat-hf-int4", + "display_name": "Llama3 8B Chat HF INT4", + "display_type": "chat", + "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", + "license": "Llama-3 (Other)", + "link": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct", + "creator_organization": "Meta", + "pricing_tier": null, + "num_parameters": 8000000000, + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 8192, + "owner_userid": null, + "config": { + "stop": ["<|eot_id|>"], + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}", + "bos_token": "<|begin_of_text|>", + "eos_token": "<|end_of_text|>", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-04-18T06:07:59.041Z", + "update_at": "2024-04-24T19:14:26.075Z", + "instances": [ + { "avzone": "us-east-1a", "cluster": "happypiglet" }, + { "avzone": "us-central-5b", "cluster": "blusterybull" } + ], + "isPrivate": true, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-central-5b", + "cluster": "blusterybull", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6630011e324b0032b64f35a0", + "name": "togethercomputer/Llama-3-8b-chat-hf-int8", + "display_name": "Togethercomputer Llama3 8B Instruct Int8", + "display_type": "chat", + "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.", + "license": "Llama-3 (Other)", + "link": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct", + "creator_organization": "Meta", + "pricing_tier": null, + "num_parameters": 8000000000, + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 8192, + "owner_userid": null, + "config": { + "stop": ["<|eot_id|>"], + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}", + "bos_token": "<|begin_of_text|>", + "eos_token": "<|end_of_text|>", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2024-04-18T06:07:59.041Z", + "update_at": "2024-04-24T19:14:26.075Z", + "instances": [ + { "avzone": "us-east-1a", "cluster": "happypiglet" }, + { "avzone": "us-central-5b", "cluster": "blusterybull" } + ], + "isPrivate": true, + "access_control": [], + "isDedicatedInstance": false, + "isFinetuned": false, + "access": "", + "hardware_label": "", + "descriptionLink": "", + "depth": { + "num_asks": 1, + "num_bids": 0, + "num_running": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0, + "stats": [ + { + "avzone": "us-east-1a", + "cluster": "happypiglet", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + }, + { + "avzone": "us-central-5b", + "cluster": "blusterybull", + "capacity": 0, + "qps": 0, + "throughput_in": 0, + "throughput_out": 0, + "error_rate": 0, + "retry_rate": 0 + } + ] + } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1112907e072b8aecbe", + "name": "EleutherAI/pythia-1b-v0", + "display_name": "Pythia (1B)", + "display_type": "language", + "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.", + "license": "", + "link": "", + "creator_organization": "EleutherAI", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "num_parameters": 1000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "chat_template_name": "gpt" }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2023-06-23T20:22:41.925Z", + "update_at": "2023-06-23T20:22:41.925Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "649e1ccca073332e47742415", + "name": "replit/replit-code-v1-3b", + "display_name": "Replit-Code-v1 (3B)", + "display_type": "code", + "description": "replit-code-v1-3b is a 2.7B Causal Language Model focused on Code Completion. The model has been trained on a subset of the Stack Dedup v1.2 dataset.", + "license": "", + "link": "", + "creator_organization": "Replit", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "limited", + "num_parameters": 3000000000, + "release_date": "2023-04-26T00:00:00.000Z", + "show_in_playground": "true", + "isFeaturedModel": false, + "context_length": 2048, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2023-06-30T00:07:40.594Z", + "update_at": "2023-07-07T20:09:09.965Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1412907e072b8aecee", + "name": "togethercomputer/Pythia-Chat-Base-7B-v0.16", + "display_name": "Pythia-Chat-Base (7B)", + "display_type": "chat", + "description": "Chat model based on EleutherAI’s Pythia-7B model, and is fine-tuned with data focusing on dialog-style interactions.", + "license": "", + "creator_organization": "Together", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": 7000000000, + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "prompt_format": ": {prompt}\n:", + "stop": [""], + "chat_template_name": "gpt" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-06-23T20:22:44.251Z", + "update_at": "2023-06-23T20:22:44.251Z", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64aceada227f790586239d11", + "name": "mosaicml/mpt-7b", + "display_name": "MPT (7B)", + "display_type": "language", + "description": "Decoder-style transformer pretrained from scratch on 1T tokens of English text and code.", + "license": "", + "link": "", + "creator_organization": "Mosaic ML", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["<|endoftext|>"], + "chat_template_name": "default", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-11T05:38:34.852Z", + "update_at": "2023-07-15T03:06:20.780Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64aceb0e227f790586239d12", + "name": "togethercomputer/mpt-30b-chat", + "display_name": "MPT-Chat (30B)", + "display_type": "chat", + "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.", + "license": "", + "link": "", + "creator_organization": "Mosaic ML", + "hardware_label": "A100 80GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 30000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["<|im_end|>"], + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "chat_template_name": "default", + "add_generation_prompt": true + }, + "pricing": { "input": 200, "output": 200, "hourly": 0 }, + "created_at": "2023-07-11T05:39:26.078Z", + "update_at": "2023-07-11T05:39:26.078Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64ace6df227f790586239cfc", + "name": "google/flan-t5-xl", + "display_name": "Flan T5 XL (3B)", + "display_type": "language", + "description": "T5 fine-tuned on more than 1000 additional tasks covering also more languages, making it better than T5 at majority of tasks. ", + "license": "", + "link": "", + "creator_organization": "Google", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": 3000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 512, + "config": { "chat_template_name": "default" }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2023-06-23T20:22:42.261Z", + "update_at": "2023-06-23T20:22:42.261Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acebe0227f790586239d17", + "name": "NumbersStation/nsql-6B", + "display_name": "NSQL (6B)", + "display_type": "language", + "description": "Foundation model designed specifically for SQL generation tasks. Pre-trained for 3 epochs and fine-tuned for 10 epochs.", + "license": "", + "creator_organization": "Numbers Station", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 6000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "stop": ["<|endoftext|>"], "chat_template_name": "default" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-11T05:42:56.540Z", + "update_at": "2023-07-11T05:42:56.540Z", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64ace9ca227f790586239d09", + "name": "togethercomputer/Koala-7B", + "display_name": "Koala (7B)", + "display_type": "chat", + "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.", + "license": "", + "link": "", + "creator_organization": "LM Sys", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": [""], + "prompt_format": "USER: {prompt} GPT:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% else %}{{ 'GPT: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ 'GPT:' }}" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-11T05:34:02.521Z", + "update_at": "2023-07-11T05:34:02.521Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1212907e072b8aecc0", + "name": "EleutherAI/pythia-6.9b", + "display_name": "Pythia (6.9B)", + "display_type": "language", + "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.", + "license": "", + "creator_organization": "EleutherAI", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "num_parameters": 6900000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "stop": ["<|endoftext|>"], "chat_template_name": "gpt" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-06-23T20:22:42.044Z", + "update_at": "2023-06-23T20:22:42.044Z", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1112907e072b8aecb8", + "name": "databricks/dolly-v2-12b", + "display_name": "Dolly v2 (12B)", + "display_type": "chat", + "description": "An instruction-following LLM based on pythia-12b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.", + "license": "", + "link": "", + "creator_organization": "Databricks", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "num_parameters": 12000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["### End"], + "prompt_format": "### Instruction:\n{prompt}\n### Response:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:' }}" + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-06-23T20:22:41.607Z", + "update_at": "2023-06-23T20:22:41.607Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1112907e072b8aecb6", + "name": "databricks/dolly-v2-3b", + "display_name": "Dolly v2 (3B)", + "display_type": "chat", + "description": "An instruction-following LLM based on pythia-3b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.", + "license": "", + "link": "", + "creator_organization": "Databricks", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "num_parameters": 3000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["### End"], + "prompt_format": "### Instruction:\n{prompt}\n### Response:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:' }}" + }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2023-06-23T20:22:41.524Z", + "update_at": "2023-06-23T20:22:41.524Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1212907e072b8aecc2", + "name": "EleutherAI/gpt-neox-20b", + "display_name": "GPT-NeoX (20B)", + "display_type": "language", + "description": "Autoregressive language model trained on the Pile. Its architecture intentionally resembles that of GPT-3, and is almost identical to that of GPT-J 6B.", + "license": "", + "link": "", + "creator_organization": "EleutherAI", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "num_parameters": 20000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "chat_template_name": "gpt" }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-06-23T20:22:42.132Z", + "update_at": "2023-06-23T20:22:42.132Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1112907e072b8aecbf", + "name": "EleutherAI/pythia-2.8b-v0", + "display_name": "Pythia (2.8B)", + "display_type": "language", + "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.", + "license": "", + "creator_organization": "EleutherAI", + "hardware_label": "A40 48GB", + "num_parameters": 2800000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "stop": ["<|endoftext|>"], "chat_template_name": "gpt" }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2023-06-23T20:22:41.975Z", + "update_at": "2023-06-23T20:22:41.975Z", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acebb2227f790586239d16", + "name": "NousResearch/Nous-Hermes-13b", + "display_name": "Nous Hermes (13B)", + "display_type": "language", + "description": "LLaMA 13B fine-tuned on over 300,000 instructions. Designed for long responses, low hallucination rate, and absence of censorship mechanisms.", + "license": "", + "link": "", + "creator_organization": "Nous Research", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 13000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "chat_template_name": "llama", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}" + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-07-11T05:42:10.444Z", + "update_at": "2023-07-11T05:42:10.444Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64ace8d1227f790586239d03", + "name": "togethercomputer/guanaco-65b", + "display_name": "Guanaco (65B) ", + "display_type": "chat", + "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.", + "license": "", + "link": "", + "creator_organization": "Tim Dettmers", + "hardware_label": "2X A100 80GB", + "pricing_tier": "Supported", + "access": "open", + "num_parameters": 65000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["###"], + "prompt_format": "### Human: {prompt} ### Assistant:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + ' ' }}{% else %}{{ '### Assistant: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant:' }}" + }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2023-07-11T05:29:53.740Z", + "update_at": "2023-07-11T05:29:53.740Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64b7165fcccc52103e2f07e5", + "name": "togethercomputer/llama-2-7b", + "display_name": "LLaMA-2 (7B)", + "display_type": "language", + "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", + "license": "", + "link": "", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "6738415616", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { "chat_template_name": "llama" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2023-07-18T22:46:55.042Z", + "renamed": "meta-llama/Llama-2-7b-hf", + "hardware_label": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acf031227f790586239d44", + "name": "lmsys/fastchat-t5-3b-v1.0", + "display_name": "Vicuna-FastChat-T5 (3B)", + "display_type": "chat", + "description": "Chatbot trained by fine-tuning Flan-t5-xl on user-shared conversations collected from ShareGPT.", + "license": "", + "link": "", + "creator_organization": "LM Sys", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "num_parameters": 3000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 512, + "config": { + "stop": ["###", ""], + "prompt_format": "### Human: {prompt}\n### Assistant:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + '\n' }}{% else %}{{ '### Assistant: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant:' }}" + }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2023-07-11T06:01:21.713Z", + "update_at": "2023-07-11T06:01:21.713Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acea6e227f790586239d0e", + "name": "huggyllama/llama-7b", + "display_name": "LLaMA (7B)", + "display_type": "language", + "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.", + "license": "", + "link": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "chat_template_name": "llama" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-11T05:36:46.255Z", + "update_at": "2023-07-11T05:36:46.255Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1212907e072b8aecc9", + "name": "OpenAssistant/stablelm-7b-sft-v7-epoch-3", + "display_name": "Open-Assistant StableLM SFT-7 (7B)", + "display_type": "chat", + "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ", + "license": "", + "link": "", + "creator_organization": "LAION", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { + "stop": ["<|endoftext|>"], + "prompt_format": "<|prompter|>{prompt}<|endoftext|><|assistant|>", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|prompter|>' + message['content'] + '<|endoftext|>' }}{% else %}{{ '<|assistant|>' + message['content'] + '<|endoftext|>\n' }}{% endif %}{% endfor %}{{ '<|assistant|>' }}" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-06-23T20:22:42.425Z", + "update_at": "2023-06-23T20:22:42.425Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1212907e072b8aecc1", + "name": "EleutherAI/pythia-12b-v0", + "display_name": "Pythia (12B)", + "display_type": "language", + "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.", + "license": "", + "link": "", + "creator_organization": "EleutherAI", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "num_parameters": 12000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "stop": ["<|endoftext|>"], "chat_template_name": "gpt" }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-06-23T20:22:42.091Z", + "update_at": "2023-06-23T20:22:42.091Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64aceb28227f790586239d13", + "name": "togethercomputer/mpt-7b-chat", + "display_name": "MPT-Chat (7B)", + "display_type": "chat", + "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.", + "license": "", + "link": "", + "creator_organization": "Mosaic ML", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["<|im_end|>"], + "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", + "chat_template_name": "default", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-11T05:39:52.024Z", + "update_at": "2023-07-11T05:39:52.024Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1112907e072b8aecbc", + "name": "EleutherAI/gpt-j-6b", + "display_name": "GPT-J (6B)", + "display_type": "language", + "description": "Transformer model trained using Ben Wang's Mesh Transformer JAX. ", + "license": "", + "link": "", + "creator_organization": "EleutherAI", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 6000000000, + "release_date": "2021-06-04T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "chat_template_name": "gpt" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-06-23T20:22:41.831Z", + "update_at": "2023-06-23T20:22:41.831Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1212907e072b8aecc8", + "name": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", + "display_name": "Open-Assistant Pythia SFT-4 (12B)", + "display_type": "chat", + "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ", + "license": "", + "link": "", + "creator_organization": "LAION", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "num_parameters": 12000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["<|endoftext|>"], + "prompt_format": "<|prompter|>{prompt}<|endoftext|><|assistant|>", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|prompter|>' + message['content'] + '<|endoftext|>' }}{% else %}{{ '<|assistant|>' + message['content'] + '<|endoftext|>\n' }}{% endif %}{% endfor %}{{ '<|assistant|>' }}" + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-06-23T20:22:42.383Z", + "update_at": "2023-06-23T20:22:42.383Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acf013227f790586239d43", + "name": "lmsys/vicuna-7b-v1.3", + "display_name": "Vicuna v1.3 (7B)", + "display_type": "chat", + "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.", + "license": "", + "link": "", + "creator_organization": "LM Sys", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": [""], + "prompt_format": "USER: {prompt}\nASSISTANT:", + "chat_template": "{% for message in messages %}{{message['role'].toLocaleUpperCase() + ': ' + message['content'] + '\n'}}{% endfor %}{{ 'ASSISTANT:' }}" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-11T06:00:51.553Z", + "update_at": "2023-07-11T06:00:51.553Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64fbbc5adfdb1e4b06b5d5cc", + "name": "Phind/Phind-CodeLlama-34B-Python-v1", + "display_name": "Phind Code LLaMA Python v1 (34B)", + "display_type": "code", + "description": "This model is fine-tuned from CodeLlama-34B-Python and achieves 69.5% pass@1 on HumanEval.", + "license": "", + "creator_organization": "Phind", + "hardware_label": "A100 80GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 33743970304, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { + "prompt_format": "### Instruction:\n{prompt}\n### Response:\n", + "stop": ["", "###"], + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}" + }, + "pricing": { "input": 200, "output": 200, "hourly": 0 }, + "created_at": "2023-09-09T00:29:14.496Z", + "update_at": "2023-09-09T00:29:14.496Z", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65ac4e5e75846d9d3ae5b836", + "name": "NumbersStation/nsql-llama-2-7B", + "display_name": "NSQL LLaMA-2 (7B)", + "display_type": "code", + "description": "NSQL is a family of autoregressive open-source large foundation models (FMs) designed specifically for SQL generation tasks", + "link": "", + "creator_organization": "Numbers Station", + "hardware_label": "A100", + "pricing_tier": "Featured", + "num_parameters": 7000000000, + "release_date": "2024-01-20T22:51:10.492Z", + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 4096, + "pricing": { "hourly": 0, "input": 50, "output": 50, "finetune": 0, "base": 0 }, + "created_at": "2024-01-20T22:51:10.492Z", + "update_at": "2024-01-20T22:59:48.333Z", + "access": "", + "license": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6532f0faf94bacfc629b4cf8", + "name": "NousResearch/Nous-Hermes-Llama2-70b", + "display_name": "Nous Hermes LLaMA-2 (70B)", + "display_type": "chat", + "description": "Nous-Hermes-Llama2-70b is a state-of-the-art language model fine-tuned on over 300,000 instructions.", + "license": "", + "link": "", + "creator_organization": "NousResearch", + "hardware_label": "2X A100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 70000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { + "stop": ["###", ""], + "prompt_format": "### Instruction:\n{prompt}\n\n### Response:\n", + "chat_template_name": "llama", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}" + }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2023-10-20T21:28:26.404Z", + "update_at": "2023-10-24T17:43:39.278Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64f67555bc372ce719b97f03", + "name": "WizardLM/WizardLM-70B-V1.0", + "display_name": "WizardLM v1.0 (70B)", + "display_type": "language", + "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities.", + "license": "", + "creator_organization": "WizardLM", + "hardware_label": "2x A100 80GB", + "pricing_tier": "supported", + "num_parameters": 70000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { + "stop": [""], + "prompt_format": "USER: {prompt} ASSISTANT:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% else %}{{ 'ASSISTANT:' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ 'ASSISTANT:' }}" + }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2023-09-05T00:24:53.327Z", + "update_at": "2023-09-05T00:24:53.327Z", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acea57227f790586239d0d", + "name": "huggyllama/llama-65b", + "display_name": "LLaMA (65B)", + "display_type": "language", + "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.", + "license": "", + "link": "", + "creator_organization": "Meta", + "hardware_label": "2x A100 80GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 65000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "chat_template_name": "llama" }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2023-07-11T05:36:23.656Z", + "update_at": "2023-07-11T05:36:23.656Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64fbbc5adfdb1e4b06b5d5ce", + "name": "lmsys/vicuna-13b-v1.5-16k", + "display_name": "Vicuna v1.5 16K (13B)", + "display_type": "chat", + "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.", + "license": "", + "creator_organization": "LM Sys", + "hardware_label": "A100 80GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 13015864320, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { + "prompt_format": "USER: {prompt}\nASSISTANT:", + "stop": [""], + "chat_template": "{% for message in messages %}{{message['role'].toLocaleUpperCase() + ': ' + message['content'] + '\n'}}{% endfor %}{{ 'ASSISTANT:' }}" + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-09-09T00:29:14.496Z", + "update_at": "2023-09-09T00:29:14.496Z", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1312907e072b8aece4", + "name": "togethercomputer/GPT-NeoXT-Chat-Base-20B", + "display_name": "GPT-NeoXT-Chat-Base (20B)", + "display_type": "chat", + "description": "Chat model fine-tuned from EleutherAI’s GPT-NeoX with over 40 million instructions on carbon reduced compute.", + "license": "", + "link": "", + "creator_organization": "Together", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": 20000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "prompt_format": ": {prompt}\n:", + "stop": [""], + "chat_template_name": "gpt" + }, + "max_tokens": 995, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-06-23T20:22:43.702Z", + "update_at": "2023-06-23T20:22:43.702Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "657bed666aca120ac2af2fb7", + "name": "HuggingFaceH4/zephyr-7b-beta", + "display_name": "Zephyr-7B-ß", + "display_type": "chat", + "description": "A fine-tuned version of Mistral-7B to act as a helpful assistant.", + "license": "", + "link": "", + "creator_organization": "HuggingFace", + "hardware_label": "2x A100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 7241732096, + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 32768, + "config": { "stop": ["[INST]", ""], "prompt_format": "[INST] {prompt} [INST]" }, + "created_at": "2023-12-15T06:08:38.925Z", + "update_at": "2023-12-15T06:08:38.925Z", + "isFinetuned": false, + "descriptionLink": "", + "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 } + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64e78eba589782acafe1781f", + "name": "togethercomputer/CodeLlama-13b-Python", + "display_name": "Code Llama Python (13B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": "13016028160", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { "stop": [""], "chat_template_name": "llama" }, + "pricing": { "input": 55, "output": 55, "hourly": 0 }, + "created_at": "2023-08-24T17:09:14.381Z", + "update_at": "2023-12-20T22:52:59.177Z", + "renamed": "codellama/CodeLlama-13b-Python-hf", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64b7165fcccc52103e2f07e7", + "name": "togethercomputer/llama-2-13b", + "display_name": "LLaMA-2 (13B)", + "display_type": "language", + "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", + "license": "", + "link": "", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "13015864320", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { "chat_template_name": "llama" }, + "pricing": { "input": 55, "output": 55, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2023-12-04T05:07:52.318Z", + "renamed": "meta-llama/Llama-2-13b-hf", + "hardware_label": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64e78e89589782acafe1781d", + "name": "togethercomputer/CodeLlama-7b-Instruct", + "display_name": "Code Llama Instruct (7B)", + "display_type": "chat", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": "6738546688", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["", "[INST]"], + "chat_template_name": "llama" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-08-24T17:08:25.379Z", + "update_at": "2023-08-24T17:08:25.379Z", + "renamed": "codellama/CodeLlama-7b-Instruct-hf", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64f0de22caa9e2eb543b373b", + "name": "togethercomputer/guanaco-13b", + "display_name": "Guanaco (13B) ", + "display_type": "chat", + "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.", + "license": "", + "link": "", + "creator_organization": "Tim Dettmers", + "hardware_label": "A40 48GB", + "pricing_tier": "Supported", + "access": "open", + "num_parameters": 13000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["###"], + "prompt_format": "### Human: {prompt} ### Assistant:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + ' ' }}{% else %}{{ '### Assistant: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant:' }}" + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-07-11T05:29:07.717Z", + "update_at": "2023-07-11T05:29:07.717Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64e7934a589782acafe17822", + "name": "togethercomputer/CodeLlama-34b-Python", + "display_name": "Code Llama Python (34B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": 34000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { "stop": [""], "chat_template_name": "llama" }, + "pricing": { "input": 194, "output": 194, "hourly": 0 }, + "created_at": "2023-08-24T17:28:42.172Z", + "update_at": "2023-08-24T17:28:42.172Z", + "renamed": "codellama/CodeLlama-34b-Python-hf", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64aceb6f227f790586239d15", + "name": "mosaicml/mpt-7b-instruct", + "display_name": "MPT-Instruct (7B)", + "display_type": "language", + "description": "Designed for short-form instruction following, finetuned on Dolly and Anthropic HH-RLHF and other datasets", + "license": "", + "link": "", + "creator_organization": "Mosaic ML", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["<|endoftext|>"], + "chat_template_name": "default", + "add_generation_prompt": true + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-11T05:41:03.757Z", + "update_at": "2023-07-11T05:41:03.757Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64b7165fcccc52103e2f07ea", + "name": "togethercomputer/llama-2-70b-chat", + "display_name": "LLaMA-2 Chat (70B)", + "display_type": "chat", + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "license": "", + "link": "", + "creator_organization": "Meta", + "hardware_label": "2X A100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "68976648192", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["[/INST]", ""], + "chat_template_name": "llama" + }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2023-07-18T22:46:55.042Z", + "autopilot_pool": "cr-a100-80-2x", + "renamed": "meta-llama/Llama-2-70b-chat-hf", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64e7934a589782acafe17823", + "name": "togethercomputer/CodeLlama-34b-Instruct", + "display_name": "Code Llama Instruct (34B)", + "display_type": "chat", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": 34000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["", "[INST]"], + "chat_template_name": "llama", + "tools_template": "{{ '<>\\n' + systemMessage['content'] + '\\n\\nYou can access the following functions. Use them if required -\\n' + tools + '\\n<>\\n\\n' + message['content'] }}" + }, + "pricing": { "input": 194, "output": 194, "hourly": 0 }, + "created_at": "2023-08-24T17:28:42.172Z", + "update_at": "2023-08-24T17:28:42.172Z", + "renamed": "codellama/CodeLlama-34b-Instruct-hf", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64e7934a589782acafe17821", + "name": "togethercomputer/CodeLlama-34b", + "display_name": "Code Llama (34B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": 34000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { "stop": [""], "chat_template_name": "llama" }, + "pricing": { "input": 194, "output": 194, "hourly": 0 }, + "created_at": "2023-08-24T17:28:42.172Z", + "update_at": "2023-08-24T17:28:42.172Z", + "renamed": "codellama/CodeLlama-34b-hf", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1412907e072b8aecf1", + "name": "Salesforce/codegen2-16B", + "display_name": "CodeGen2 (16B)", + "display_type": "code", + "description": "An autoregressive language models for program synthesis.", + "license": "", + "link": "", + "creator_organization": "Salesforce", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 16000000000, + "release_date": "2022-03-25T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "stop": ["\n\n"], "chat_template_name": "gpt" }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-06-23T20:22:44.453Z", + "update_at": "2023-06-23T20:22:44.453Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64ace476227f790586239cef", + "name": "Salesforce/codegen2-7B", + "display_name": "CodeGen2 (7B)", + "display_type": "code", + "description": "An autoregressive language models for program synthesis.", + "license": "", + "link": "", + "creator_organization": "Salesforce", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 7000000000, + "release_date": "2022-03-25T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "stop": ["\n\n"], "chat_template_name": "gpt" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-11T05:11:18.328Z", + "update_at": "2023-07-11T05:11:18.328Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1212907e072b8aecc5", + "name": "google/flan-t5-xxl", + "display_name": "Flan T5 XXL (11B)", + "display_type": "language", + "description": "Flan T5 XXL (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)).", + "creator_organization": "Google", + "hardware_label": "A40 48GB", + "access": "open", + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 512, + "config": { "chat_template_name": "default" }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2023-06-23T20:22:42.261Z", + "update_at": "2023-09-01T14:35:00.161Z", + "license": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64b7165fcccc52103e2f07e9", + "name": "togethercomputer/llama-2-70b", + "display_name": "LLaMA-2 (70B)", + "display_type": "language", + "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters", + "license": "", + "link": "", + "creator_organization": "Meta", + "hardware_label": "2X A100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "68976648192", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { "chat_template_name": "llama" }, + "pricing": { "input": 225, "output": 225, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2023-07-18T22:46:55.042Z", + "autopilot_pool": "cr-a100-80-2x", + "renamed": "meta-llama/Llama-2-70b-hf", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6de95e620478cfa14425f", + "name": "codellama/CodeLlama-7b-hf", + "display_name": "Code Llama (7B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": "6738546688", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { "stop": [""], "chat_template_name": "llama" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-08-24T17:08:25.379Z", + "update_at": "2023-08-24T17:08:25.379Z", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6de95e620478cfa14425c", + "name": "codellama/CodeLlama-13b-hf", + "display_name": "Code Llama (13B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": "13016028160", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { "stop": [""], "chat_template_name": "llama" }, + "pricing": { "input": 55, "output": 55, "hourly": 0 }, + "created_at": "2023-08-24T17:09:14.381Z", + "update_at": "2023-12-21T01:12:38.916Z", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64e78eba589782acafe17820", + "name": "togethercomputer/CodeLlama-13b-Instruct", + "display_name": "Code Llama Instruct (13B)", + "display_type": "chat", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "pricing_tier": "Featured", + "num_parameters": "13016028160", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["", "[INST]"], + "chat_template_name": "llama" + }, + "pricing": { "input": 55, "output": 55, "hourly": 0 }, + "created_at": "2023-08-24T17:09:14.381Z", + "update_at": "2023-12-04T05:01:42.539Z", + "renamed": "codellama/CodeLlama-13b-Instruct-hf", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64b7165fcccc52103e2f07e8", + "name": "togethercomputer/llama-2-13b-chat", + "display_name": "LLaMA-2 Chat (13B)", + "display_type": "chat", + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "license": "", + "link": "", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "13015864320", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["[/INST]", ""], + "chat_template_name": "llama" + }, + "pricing": { "input": 55, "output": 55, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2023-12-04T05:00:54.436Z", + "renamed": "meta-llama/Llama-2-13b-chat-hf", + "hardware_label": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acefe5227f790586239d41", + "name": "lmsys/vicuna-13b-v1.3", + "display_name": "Vicuna v1.3 (13B)", + "display_type": "chat", + "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.", + "license": "", + "link": "", + "creator_organization": "LM Sys", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 13000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": [""], + "prompt_format": "USER: {prompt}\nASSISTANT:", + "chat_template": "{% for message in messages %}{{message['role'].toLocaleUpperCase() + ': ' + message['content'] + '\n'}}{% endfor %}{{ 'ASSISTANT:' }}" + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-07-11T06:00:05.166Z", + "update_at": "2023-07-15T03:08:44.173Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acea0b227f790586239d0b", + "name": "huggyllama/llama-13b", + "display_name": "LLaMA (13B)", + "display_type": "language", + "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.", + "license": "", + "link": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 13000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "chat_template_name": "llama" }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-07-11T05:35:07.955Z", + "update_at": "2023-07-11T05:35:07.955Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acefbe227f790586239d40", + "name": "HuggingFaceH4/starchat-alpha", + "display_name": "StarCoderChat Alpha (16B)", + "display_type": "chat", + "description": "Fine-tuned from StarCoder to act as a helpful coding assistant. As an alpha release is only intended for educational or research purpopses.", + "license": "", + "link": "", + "creator_organization": "HuggingFaceH4", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "num_parameters": 16000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 8192, + "config": { + "stop": ["<|endoftext|>", "<|end|>"], + "prompt_format": "<|system|>\n<|end|>\n<|user|>\n{prompt}<|end|>\n<|assistant|>", + "chat_template_name": "default" + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-07-11T05:59:26.298Z", + "update_at": "2023-07-11T05:59:26.298Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acea35227f790586239d0c", + "name": "huggyllama/llama-30b", + "display_name": "LLaMA (30B)", + "display_type": "language", + "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.", + "license": "", + "link": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "access": "open", + "num_parameters": 33000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "chat_template_name": "llama" }, + "pricing": { "input": 200, "output": 200, "hourly": 0 }, + "created_at": "2023-07-11T05:35:49.870Z", + "update_at": "2023-07-11T05:35:49.870Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1412907e072b8aecf4", + "name": "stabilityai/stablelm-base-alpha-3b", + "display_name": "StableLM-Base-Alpha (3B)", + "display_type": "language", + "description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.", + "license": "", + "link": "", + "creator_organization": "Stability AI", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "num_parameters": 3000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { "chat_template_name": "gpt" }, + "pricing": { "input": 25, "output": 25, "hourly": 0 }, + "created_at": "2023-06-23T20:22:44.907Z", + "update_at": "2023-06-23T20:22:44.907Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1512907e072b8aecf5", + "name": "stabilityai/stablelm-base-alpha-7b", + "display_name": "StableLM-Base-Alpha (7B)", + "display_type": "language", + "description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.", + "license": "", + "link": "", + "creator_organization": "Stability AI", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { "chat_template_name": "gpt" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-06-23T20:22:45.249Z", + "update_at": "2023-06-23T20:22:45.249Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64e78e89589782acafe1781c", + "name": "togethercomputer/CodeLlama-7b-Python", + "display_name": "Code Llama Python (7B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": "6738546688", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { "stop": [""], "chat_template_name": "llama" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-08-24T17:08:25.379Z", + "update_at": "2023-08-24T17:08:25.379Z", + "renamed": "codellama/CodeLlama-7b-Python-hf", + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64f67987bc372ce719b97f07", + "name": "defog/sqlcoder", + "display_name": "Sqlcoder (15B)", + "display_type": "language", + "description": "Defog's SQLCoder is a state-of-the-art LLM for converting natural language questions to SQL queries, fine-tuned from Bigcode's Starcoder 15B model.", + "license": "", + "creator_organization": "Defog", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 15000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 8192, + "config": { + "stop": ["<|endoftext|>"], + "prompt_format": "### Instructions:\n\n{prompt}\n\n### Response:\n" + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-09-05T00:42:47.496Z", + "update_at": "2023-09-05T00:42:47.496Z", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64acef6e227f790586239d3f", + "name": "bigcode/starcoder", + "display_name": "StarCoder (16B)", + "display_type": "code", + "description": "Trained on 80+ coding languages, uses Multi Query Attention, an 8K context window, and was trained using the Fill-in-the-Middle objective on 1T tokens.", + "license": "", + "link": "", + "creator_organization": "BigCode", + "hardware_label": "A100 80GB", + "pricing_tier": "supported", + "num_parameters": 16000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 8192, + "config": { "stop": ["<|endoftext|>", "<|end|>"] }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-07-11T05:58:06.486Z", + "update_at": "2023-07-11T05:58:06.486Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1112907e072b8aecb7", + "name": "databricks/dolly-v2-7b", + "display_name": "Dolly v2 (7B)", + "display_type": "chat", + "description": "An instruction-following LLM based on pythia-7b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.", + "license": "", + "link": "", + "creator_organization": "Databricks", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["### End"], + "prompt_format": "### Instruction:\n{prompt}\n### Response:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:' }}" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-06-23T20:22:41.565Z", + "update_at": "2023-06-23T20:22:41.565Z", + "access": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64ace8a3227f790586239d02", + "name": "togethercomputer/guanaco-33b", + "display_name": "Guanaco (33B) ", + "display_type": "chat", + "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.", + "license": "", + "link": "", + "creator_organization": "Tim Dettmers", + "hardware_label": "A100 80GB", + "pricing_tier": "Supported", + "access": "open", + "num_parameters": 33000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["###"], + "prompt_format": "### Human: {prompt} ### Assistant:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + ' ' }}{% else %}{{ '### Assistant: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant:' }}" + }, + "pricing": { "input": 200, "output": 200, "hourly": 0 }, + "created_at": "2023-07-11T05:29:07.717Z", + "update_at": "2023-07-11T05:29:07.717Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64ace9b1227f790586239d07", + "name": "togethercomputer/Koala-13B", + "display_name": "Koala (13B)", + "display_type": "chat", + "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.", + "license": "", + "link": "", + "creator_organization": "LM Sys", + "hardware_label": "A40 48GB", + "pricing_tier": "supported", + "access": "open", + "num_parameters": 13000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": [""], + "prompt_format": "USER: {prompt} GPT:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% else %}{{ 'GPT: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ 'GPT:' }}" + }, + "pricing": { "input": 75, "output": 75, "hourly": 0 }, + "created_at": "2023-07-11T05:33:37.737Z", + "update_at": "2023-07-11T05:33:37.737Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6495ff1312907e072b8aece2", + "name": "togethercomputer/GPT-JT-6B-v1", + "display_name": "GPT-JT (6B)", + "display_type": "language", + "description": "Fork of GPT-J instruction tuned to excel at few-shot prompts (blog post).", + "descriptionLink": "https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai", + "license": "", + "link": "", + "creator_organization": "Together", + "hardware_label": "A40 48GB", + "pricing_tier": "featured", + "access": "open", + "num_parameters": 6700000000, + "release_date": "2022-11-29T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { "chat_template_name": "gpt" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-06-23T20:22:43.617Z", + "update_at": "2023-06-23T20:22:43.617Z" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64b7165fcccc52103e2f07e6", + "name": "togethercomputer/llama-2-7b-chat", + "display_name": "LLaMA-2 Chat (7B)", + "display_type": "chat", + "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters", + "license": "", + "link": "", + "creator_organization": "Meta", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": "6738415616", + "show_in_playground": true, + "finetuning_supported": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { + "prompt_format": "[INST] {prompt} [/INST]", + "stop": ["[/INST]", ""], + "chat_template_name": "llama" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-18T22:46:55.042Z", + "update_at": "2023-07-18T22:46:55.042Z", + "renamed": "meta-llama/Llama-2-7b-chat-hf", + "hardware_label": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "662b250e246deee9aefbcc50", + "name": "togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4", + "display_name": "Upstage SOLAR Instruct v1 (11B)-Int4", + "display_type": "chat", + "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling", + "license": "", + "creator_organization": "upstage", + "hardware_label": "A100B", + "pricing_tier": "Featured", + "num_parameters": 10700000000, + "release_date": "2023-12-01T00:00:00.000Z", + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { + "add_generation_prompt": true, + "stop": ["<|im_end|>", "<|im_start|>"], + "chat_template": "{% for message in messages %}{{'<|im_start|>'}}{% if message['role'] == 'user' %}{{'user\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'assistant' %}{{'assistant\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'system' %}{{'system\n' + message['content'] + '<|im_end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}" + }, + "pricing": { "input": 75, "output": 75 }, + "created_at": "2024-04-26T03:52:46.866Z", + "update_at": "2024-04-26T03:52:46.866Z", + "instances": [], + "access": "", + "link": "", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "64ace8ed227f790586239d04", + "name": "togethercomputer/guanaco-7b", + "display_name": "Guanaco (7B) ", + "display_type": "chat", + "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks. ", + "license": "", + "link": "", + "creator_organization": "Tim Dettmers", + "hardware_label": "A40 48GB", + "access": "open", + "num_parameters": 7000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 2048, + "config": { + "stop": ["###"], + "prompt_format": "### Human: {prompt} ### Assistant:", + "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + ' ' }}{% else %}{{ '### Assistant: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant:' }}" + }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-07-11T05:30:21.531Z", + "update_at": "2023-07-11T05:30:21.531Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "6532f0faf94bacfc629b4cf7", + "name": "EleutherAI/llemma_7b", + "display_name": "Llemma (7B)", + "display_type": "language", + "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens.", + "license": "", + "link": "", + "creator_organization": "EleutherAI", + "hardware_label": "A100 80GB", + "pricing_tier": "Featured", + "access": "open", + "num_parameters": 6738546688, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 4096, + "config": { "chat_template_name": "llama" }, + "pricing": { "input": 50, "output": 50, "hourly": 0 }, + "created_at": "2023-10-20T21:28:26.403Z", + "update_at": "2023-10-24T17:42:38.630Z", + "descriptionLink": "" + }, + { + "modelInstanceConfig": { "appearsIn": [], "order": 0 }, + "_id": "65a6de96e620478cfa144262", + "name": "codellama/CodeLlama-34b-hf", + "display_name": "Code Llama (34B)", + "display_type": "code", + "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.", + "license": "", + "creator_organization": "Meta", + "hardware_label": "A100 80GB", + "num_parameters": 34000000000, + "show_in_playground": true, + "isFeaturedModel": false, + "context_length": 16384, + "config": { "stop": [""], "chat_template_name": "llama" }, + "pricing": { "input": 194, "output": 194, "hourly": 0 }, + "created_at": "2023-08-24T17:28:42.172Z", + "update_at": "2023-08-24T17:28:42.172Z", + "access": "", + "link": "", + "descriptionLink": "" + } +] diff --git a/src/libs/agent-runtime/togetherai/index.test.ts b/src/libs/agent-runtime/togetherai/index.test.ts index e13d62375ea2..ac2fad9ebdfc 100644 --- a/src/libs/agent-runtime/togetherai/index.test.ts +++ b/src/libs/agent-runtime/togetherai/index.test.ts @@ -2,9 +2,10 @@ import OpenAI from 'openai'; import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { ChatStreamCallbacks, LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime'; +import { LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime'; import * as debugStreamModule from '../utils/debugStream'; +import models from './fixtures/models.json'; import { LobeTogetherAI } from './index'; const provider = 'togetherai'; @@ -295,4 +296,17 @@ describe('LobeTogetherAI', () => { }); }); }); + + describe('models', () => { + it('should get models', async () => { + vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce({ + json: async () => models, + ok: true, + } as Response); + + const list = await instance.models(); + + expect(list).toMatchSnapshot(); + }); + }); }); diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.test.ts b/src/libs/agent-runtime/utils/anthropicHelpers.test.ts index 8f15e880c8c9..9fd320d9cacd 100644 --- a/src/libs/agent-runtime/utils/anthropicHelpers.test.ts +++ b/src/libs/agent-runtime/utils/anthropicHelpers.test.ts @@ -1,3 +1,4 @@ +import { OpenAI } from 'openai'; import { describe, expect, it } from 'vitest'; import { OpenAIChatMessage, UserMessageContentPart } from '../types/chat'; @@ -5,6 +6,7 @@ import { buildAnthropicBlock, buildAnthropicMessage, buildAnthropicMessages, + buildAnthropicTools, } from './anthropicHelpers'; import { parseDataUri } from './uriParser'; @@ -51,6 +53,86 @@ describe('anthropicHelpers', () => { const result = buildAnthropicMessage(message); expect(result).toEqual({ content: [{ type: 'text', text: 'Hello!' }], role: 'user' }); }); + + it('should correctly convert user message with string content', () => { + const message: OpenAIChatMessage = { + content: 'Hello!', + role: 'user', + }; + const result = buildAnthropicMessage(message); + expect(result).toEqual({ content: 'Hello!', role: 'user' }); + }); + + it('should correctly convert user message with content parts', () => { + const message: OpenAIChatMessage = { + content: [ + { type: 'text', text: 'Check out this image:' }, + { type: 'image_url', image_url: { url: '' } }, + ], + role: 'user', + }; + const result = buildAnthropicMessage(message); + expect(result.role).toBe('user'); + expect(result.content).toHaveLength(2); + expect((result.content[1] as any).type).toBe('image'); + }); + + it('should correctly convert tool message', () => { + const message: OpenAIChatMessage = { + content: 'Tool result content', + role: 'tool', + tool_call_id: 'tool123', + }; + const result = buildAnthropicMessage(message); + expect(result.role).toBe('user'); + expect(result.content).toEqual([ + { + content: 'Tool result content', + tool_use_id: 'tool123', + type: 'tool_result', + }, + ]); + }); + + it('should correctly convert assistant message with tool calls', () => { + const message: OpenAIChatMessage = { + content: 'Here is the result:', + role: 'assistant', + tool_calls: [ + { + id: 'call1', + type: 'function', + function: { + name: 'search', + arguments: '{"query":"anthropic"}', + }, + }, + ], + }; + const result = buildAnthropicMessage(message); + expect(result.role).toBe('assistant'); + expect(result.content).toEqual([ + { text: 'Here is the result:', type: 'text' }, + { + id: 'call1', + input: { query: 'anthropic' }, + name: 'search', + type: 'tool_use', + }, + ]); + }); + + it('should correctly convert function message', () => { + const message: OpenAIChatMessage = { + content: 'def hello(name):\n return f"Hello {name}"', + role: 'function', + }; + const result = buildAnthropicMessage(message); + expect(result).toEqual({ + content: 'def hello(name):\n return f"Hello {name}"', + role: 'assistant', + }); + }); }); describe('buildAnthropicMessages', () => { @@ -111,4 +193,41 @@ describe('anthropicHelpers', () => { ]); }); }); + + describe('buildAnthropicTools', () => { + it('should correctly convert OpenAI tools to Anthropic format', () => { + const tools: OpenAI.ChatCompletionTool[] = [ + { + type: 'function', + function: { + name: 'search', + description: 'Searches the web', + parameters: { + type: 'object', + properties: { + query: { type: 'string' }, + }, + required: ['query'], + }, + }, + }, + ]; + + const result = buildAnthropicTools(tools); + + expect(result).toEqual([ + { + name: 'search', + description: 'Searches the web', + input_schema: { + type: 'object', + properties: { + query: { type: 'string' }, + }, + required: ['query'], + }, + }, + ]); + }); + }); }); diff --git a/src/libs/agent-runtime/utils/debugStream.test.ts b/src/libs/agent-runtime/utils/debugStream.test.ts new file mode 100644 index 000000000000..1b98a3fcb317 --- /dev/null +++ b/src/libs/agent-runtime/utils/debugStream.test.ts @@ -0,0 +1,70 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { debugStream } from './debugStream'; + +describe('debugStream', () => { + let consoleLogSpy: ReturnType; + let consoleErrorSpy: ReturnType; + + beforeEach(() => { + consoleLogSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + }); + + afterEach(() => { + consoleLogSpy.mockRestore(); + consoleErrorSpy.mockRestore(); + }); + + it('should log stream start and end messages', async () => { + const stream = new ReadableStream({ + start(controller) { + controller.enqueue('test chunk'); + controller.close(); + }, + }); + + await debugStream(stream); + + expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringMatching(/^\[stream start\]/)); + }); + + it('should handle and log stream errors', async () => { + const stream = new ReadableStream({ + start(controller) { + controller.enqueue('test chunk'); + }, + }); + + await debugStream(stream); + + expect(consoleErrorSpy).toHaveBeenCalledWith('[debugStream error]', expect.any(Error)); + expect(consoleErrorSpy).toHaveBeenCalledWith('[error chunk value:]', 'test chunk'); + }); + + it('should decode ArrayBuffer chunk values', async () => { + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode('test chunk')); + controller.close(); + }, + }); + + await debugStream(stream); + + expect(consoleLogSpy).toHaveBeenCalledWith('test chunk'); + }); + + it('should stringify non-string chunk values', async () => { + const stream = new ReadableStream({ + start(controller) { + controller.enqueue({ test: 'chunk' }); + controller.close(); + }, + }); + + await debugStream(stream); + + expect(consoleLogSpy).toHaveBeenCalledWith('{"test":"chunk"}'); + }); +}); diff --git a/src/libs/agent-runtime/utils/streams/anthropic.test.ts b/src/libs/agent-runtime/utils/streams/anthropic.test.ts new file mode 100644 index 000000000000..2dffff7c34f4 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/anthropic.test.ts @@ -0,0 +1,197 @@ +import type { Stream } from '@anthropic-ai/sdk/streaming'; +import { describe, expect, it, vi } from 'vitest'; + +import { AnthropicStream } from './anthropic'; + +describe('AnthropicStream', () => { + it('should transform Anthropic stream to protocol stream', async () => { + // @ts-ignore + const mockAnthropicStream: Stream = { + [Symbol.asyncIterator]() { + let count = 0; + return { + next: async () => { + switch (count) { + case 0: + count++; + return { + done: false, + value: { + type: 'message_start', + message: { id: 'message_1', metadata: {} }, + }, + }; + case 1: + count++; + return { + done: false, + value: { + type: 'content_block_delta', + delta: { type: 'text_delta', text: 'Hello' }, + }, + }; + case 2: + count++; + return { + done: false, + value: { + type: 'content_block_delta', + delta: { type: 'text_delta', text: ' world!' }, + }, + }; + case 3: + count++; + return { + done: false, + value: { + type: 'message_delta', + delta: { stop_reason: 'stop' }, + }, + }; + default: + return { done: true, value: undefined }; + } + }, + }; + }, + }; + + const onStartMock = vi.fn(); + const onTextMock = vi.fn(); + const onTokenMock = vi.fn(); + const onCompletionMock = vi.fn(); + + const protocolStream = AnthropicStream(mockAnthropicStream, { + onStart: onStartMock, + onText: onTextMock, + onToken: onTokenMock, + onCompletion: onCompletionMock, + }); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + 'id: message_1\n', + 'event: data\n', + `data: {"id":"message_1","metadata":{}}\n\n`, + 'id: message_1\n', + 'event: text\n', + `data: "Hello"\n\n`, + 'id: message_1\n', + 'event: text\n', + `data: " world!"\n\n`, + 'id: message_1\n', + 'event: stop\n', + `data: "stop"\n\n`, + ]); + + expect(onStartMock).toHaveBeenCalledTimes(1); + expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"'); + expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"'); + expect(onTokenMock).toHaveBeenCalledTimes(2); + expect(onCompletionMock).toHaveBeenCalledTimes(1); + }); + + it('should handle tool use event and ReadableStream input', async () => { + const toolUseEvent = { + type: 'content_block_delta', + delta: { + type: 'tool_use', + tool_use: { + id: 'tool_use_1', + name: 'example_tool', + input: { arg1: 'value1' }, + }, + }, + }; + + const mockReadableStream = new ReadableStream({ + start(controller) { + controller.enqueue({ + type: 'message_start', + message: { id: 'message_1', metadata: {} }, + }); + controller.enqueue(toolUseEvent); + controller.enqueue({ + type: 'message_stop', + }); + controller.close(); + }, + }); + + const onToolCallMock = vi.fn(); + + const protocolStream = AnthropicStream(mockReadableStream, { + onToolCall: onToolCallMock, + }); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + 'id: message_1\n', + 'event: data\n', + `data: {"id":"message_1","metadata":{}}\n\n`, + 'id: message_1\n', + 'event: tool_calls\n', + `data: [{"function":{"arguments":"{\\"arg1\\":\\"value1\\"}","name":"example_tool"},"id":"tool_use_1","index":0,"type":"function"}]\n\n`, + 'id: message_1\n', + 'event: stop\n', + `data: "message_stop"\n\n`, + ]); + + expect(onToolCallMock).toHaveBeenCalledTimes(1); + }); + + it('should handle ReadableStream input', async () => { + const mockReadableStream = new ReadableStream({ + start(controller) { + controller.enqueue({ + type: 'message_start', + message: { id: 'message_1', metadata: {} }, + }); + controller.enqueue({ + type: 'content_block_delta', + delta: { type: 'text_delta', text: 'Hello' }, + }); + controller.enqueue({ + type: 'message_stop', + }); + controller.close(); + }, + }); + + const protocolStream = AnthropicStream(mockReadableStream); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + 'id: message_1\n', + 'event: data\n', + `data: {"id":"message_1","metadata":{}}\n\n`, + 'id: message_1\n', + 'event: text\n', + `data: "Hello"\n\n`, + 'id: message_1\n', + 'event: stop\n', + `data: "message_stop"\n\n`, + ]); + }); +}); diff --git a/src/libs/agent-runtime/utils/streams/bedrock/llama.test.ts b/src/libs/agent-runtime/utils/streams/bedrock/llama.test.ts new file mode 100644 index 000000000000..2032faf73ce3 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/bedrock/llama.test.ts @@ -0,0 +1,196 @@ +import { InvokeModelWithResponseStreamResponse } from '@aws-sdk/client-bedrock-runtime'; +import { Readable } from 'stream'; +import { describe, expect, it, vi } from 'vitest'; + +import * as uuidModule from '@/utils/uuid'; + +import { AWSBedrockLlamaStream } from './llama'; + +describe('AWSBedrockLlamaStream', () => { + it('should transform Bedrock Llama stream to protocol stream', async () => { + vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1'); + const mockBedrockStream = new ReadableStream({ + start(controller) { + controller.enqueue({ generation: 'Hello', generation_token_count: 1 }); + controller.enqueue({ generation: ' world!', generation_token_count: 2 }); + controller.enqueue({ stop_reason: 'stop' }); + controller.close(); + }, + }); + + const onStartMock = vi.fn(); + const onTextMock = vi.fn(); + const onTokenMock = vi.fn(); + const onCompletionMock = vi.fn(); + + const protocolStream = AWSBedrockLlamaStream(mockBedrockStream, { + onStart: onStartMock, + onText: onTextMock, + onToken: onTokenMock, + onCompletion: onCompletionMock, + }); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + 'id: chat_1\n', + 'event: text\n', + `data: "Hello"\n\n`, + 'id: chat_1\n', + 'event: text\n', + `data: " world!"\n\n`, + 'id: chat_1\n', + 'event: stop\n', + `data: "finished"\n\n`, + ]); + + expect(onStartMock).toHaveBeenCalledTimes(1); + expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"'); + expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"'); + expect(onTokenMock).toHaveBeenCalledTimes(2); + expect(onCompletionMock).toHaveBeenCalledTimes(1); + }); + + it('should transform Bedrock Llama AsyncIterator to protocol stream', async () => { + vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1'); + + const mockBedrockStream: InvokeModelWithResponseStreamResponse = { + body: { + // @ts-ignore + async *[Symbol.asyncIterator]() { + yield { generation: 'Hello', generation_token_count: 1 }; + yield { generation: ' world!', generation_token_count: 2 }; + yield { stop_reason: 'stop' }; + }, + }, + }; + + const onStartMock = vi.fn(); + const onTextMock = vi.fn(); + const onTokenMock = vi.fn(); + const onCompletionMock = vi.fn(); + + const protocolStream = AWSBedrockLlamaStream(mockBedrockStream, { + onStart: onStartMock, + onText: onTextMock, + onToken: onTokenMock, + onCompletion: onCompletionMock, + }); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + 'id: chat_1\n', + 'event: text\n', + `data: "Hello"\n\n`, + 'id: chat_1\n', + 'event: text\n', + `data: " world!"\n\n`, + 'id: chat_1\n', + 'event: stop\n', + `data: "finished"\n\n`, + ]); + + expect(onStartMock).toHaveBeenCalledTimes(1); + expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"'); + expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"'); + expect(onTokenMock).toHaveBeenCalledTimes(2); + expect(onCompletionMock).toHaveBeenCalledTimes(1); + }); + + it('should handle Bedrock response with chunk property', async () => { + vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('2'); + + const mockBedrockStream: InvokeModelWithResponseStreamResponse = { + contentType: 'any', + body: { + // @ts-ignore + async *[Symbol.asyncIterator]() { + yield { + chunk: { + bytes: new TextEncoder().encode('{"generation":"Hello","generation_token_count":1}'), + }, + }; + yield { + chunk: { + bytes: new TextEncoder().encode( + '{"generation":" world!","generation_token_count":2}', + ), + }, + }; + yield { chunk: { bytes: new TextEncoder().encode('{"stop_reason":"stop"}') } }; + }, + }, + }; + + const onStartMock = vi.fn(); + const onTextMock = vi.fn(); + const onTokenMock = vi.fn(); + const onCompletionMock = vi.fn(); + + const protocolStream = AWSBedrockLlamaStream(mockBedrockStream, { + onStart: onStartMock, + onText: onTextMock, + onToken: onTokenMock, + onCompletion: onCompletionMock, + }); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + 'id: chat_2\n', + 'event: text\n', + `data: "Hello"\n\n`, + 'id: chat_2\n', + 'event: text\n', + `data: " world!"\n\n`, + 'id: chat_2\n', + 'event: stop\n', + `data: "finished"\n\n`, + ]); + + expect(onStartMock).toHaveBeenCalledTimes(1); + expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"'); + expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"'); + expect(onTokenMock).toHaveBeenCalledTimes(2); + expect(onCompletionMock).toHaveBeenCalledTimes(1); + }); + + it('should handle empty stream', async () => { + const mockBedrockStream = new ReadableStream({ + start(controller) { + controller.close(); + }, + }); + + const protocolStream = AWSBedrockLlamaStream(mockBedrockStream); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([]); + }); +}); diff --git a/src/libs/agent-runtime/utils/streams/google-ai.test.ts b/src/libs/agent-runtime/utils/streams/google-ai.test.ts new file mode 100644 index 000000000000..7477f86502a2 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/google-ai.test.ts @@ -0,0 +1,97 @@ +import { EnhancedGenerateContentResponse } from '@google/generative-ai'; +import { describe, expect, it, vi } from 'vitest'; + +import * as uuidModule from '@/utils/uuid'; + +import { GoogleGenerativeAIStream } from './google-ai'; + +describe('GoogleGenerativeAIStream', () => { + it('should transform Google Generative AI stream to protocol stream', async () => { + vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1'); + + const mockGenerateContentResponse = (text: string, functionCalls?: any[]) => + ({ + text: () => text, + functionCall: () => functionCalls?.[0], + functionCalls: () => functionCalls, + }) as EnhancedGenerateContentResponse; + + const mockGoogleStream = new ReadableStream({ + start(controller) { + controller.enqueue(mockGenerateContentResponse('Hello')); + + controller.enqueue( + mockGenerateContentResponse('', [{ name: 'testFunction', args: { arg1: 'value1' } }]), + ); + controller.enqueue(mockGenerateContentResponse(' world!')); + controller.close(); + }, + }); + + const onStartMock = vi.fn(); + const onTextMock = vi.fn(); + const onTokenMock = vi.fn(); + const onToolCallMock = vi.fn(); + const onCompletionMock = vi.fn(); + + const protocolStream = GoogleGenerativeAIStream(mockGoogleStream, { + onStart: onStartMock, + onText: onTextMock, + onToken: onTokenMock, + onToolCall: onToolCallMock, + onCompletion: onCompletionMock, + }); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + // text + 'id: chat_1\n', + 'event: text\n', + `data: "Hello"\n\n`, + + // tool call + 'id: chat_1\n', + 'event: tool_calls\n', + `data: [{"function":{"arguments":"{\\"arg1\\":\\"value1\\"}","name":"testFunction"},"id":"testFunction_0","index":0,"type":"function"}]\n\n`, + + // text + 'id: chat_1\n', + 'event: text\n', + `data: " world!"\n\n`, + ]); + + expect(onStartMock).toHaveBeenCalledTimes(1); + expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"'); + expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"'); + expect(onTokenMock).toHaveBeenCalledTimes(2); + expect(onToolCallMock).toHaveBeenCalledTimes(1); + expect(onCompletionMock).toHaveBeenCalledTimes(1); + }); + + it('should handle empty stream', async () => { + const mockGoogleStream = new ReadableStream({ + start(controller) { + controller.close(); + }, + }); + + const protocolStream = GoogleGenerativeAIStream(mockGoogleStream); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([]); + }); +}); diff --git a/src/libs/agent-runtime/utils/streams/ollama.test.ts b/src/libs/agent-runtime/utils/streams/ollama.test.ts new file mode 100644 index 000000000000..08202fe78011 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/ollama.test.ts @@ -0,0 +1,77 @@ +import { ChatResponse } from 'ollama/browser'; +import { describe, expect, it, vi } from 'vitest'; + +import * as uuidModule from '@/utils/uuid'; + +import { OllamaStream } from './ollama'; + +describe('OllamaStream', () => { + it('should transform Ollama stream to protocol stream', async () => { + vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1'); + + const mockOllamaStream: AsyncIterable = { + // @ts-ignore + async *[Symbol.asyncIterator]() { + yield { message: { content: 'Hello' }, done: false }; + yield { message: { content: ' world!' }, done: false }; + yield { message: { content: '' }, done: true }; + }, + }; + + const onStartMock = vi.fn(); + const onTextMock = vi.fn(); + const onTokenMock = vi.fn(); + const onCompletionMock = vi.fn(); + + const protocolStream = OllamaStream(mockOllamaStream, { + onStart: onStartMock, + onText: onTextMock, + onToken: onTokenMock, + onCompletion: onCompletionMock, + }); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + 'id: chat_1\n', + 'event: text\n', + `data: "Hello"\n\n`, + 'id: chat_1\n', + 'event: text\n', + `data: " world!"\n\n`, + 'id: chat_1\n', + 'event: stop\n', + `data: "finished"\n\n`, + ]); + + expect(onStartMock).toHaveBeenCalledTimes(1); + expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"'); + expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"'); + expect(onTokenMock).toHaveBeenCalledTimes(2); + expect(onCompletionMock).toHaveBeenCalledTimes(1); + }); + + it('should handle empty stream', async () => { + const mockOllamaStream = { + async *[Symbol.asyncIterator]() {}, + }; + + const protocolStream = OllamaStream(mockOllamaStream); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([]); + }); +}); diff --git a/src/libs/agent-runtime/utils/streams/openai.test.ts b/src/libs/agent-runtime/utils/streams/openai.test.ts new file mode 100644 index 000000000000..203775d86d59 --- /dev/null +++ b/src/libs/agent-runtime/utils/streams/openai.test.ts @@ -0,0 +1,263 @@ +import { describe, expect, it, vi } from 'vitest'; + +import { OpenAIStream } from './openai'; + +describe('OpenAIStream', () => { + it('should transform OpenAI stream to protocol stream', async () => { + const mockOpenAIStream = new ReadableStream({ + start(controller) { + controller.enqueue({ + choices: [ + { + delta: { content: 'Hello' }, + index: 0, + }, + ], + id: '1', + }); + controller.enqueue({ + choices: [ + { + delta: { content: ' world!' }, + index: 1, + }, + ], + id: '1', + }); + controller.enqueue({ + choices: [ + { + delta: null, + finish_reason: 'stop', + index: 2, + }, + ], + id: '1', + }); + + controller.close(); + }, + }); + + const onStartMock = vi.fn(); + const onTextMock = vi.fn(); + const onTokenMock = vi.fn(); + const onCompletionMock = vi.fn(); + + const protocolStream = OpenAIStream(mockOpenAIStream, { + onStart: onStartMock, + onText: onTextMock, + onToken: onTokenMock, + onCompletion: onCompletionMock, + }); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + 'id: 1\n', + 'event: text\n', + `data: "Hello"\n\n`, + 'id: 1\n', + 'event: text\n', + `data: " world!"\n\n`, + 'id: 1\n', + 'event: stop\n', + `data: "stop"\n\n`, + ]); + + expect(onStartMock).toHaveBeenCalledTimes(1); + expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"'); + expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"'); + expect(onTokenMock).toHaveBeenCalledTimes(2); + expect(onCompletionMock).toHaveBeenCalledTimes(1); + }); + + it('should handle tool calls', async () => { + const mockOpenAIStream = new ReadableStream({ + start(controller) { + controller.enqueue({ + choices: [ + { + delta: { + tool_calls: [ + { + function: { name: 'tool1', arguments: '{}' }, + id: 'call_1', + index: 0, + type: 'function', + }, + { + function: { name: 'tool2', arguments: '{}' }, + id: 'call_2', + index: 1, + }, + ], + }, + index: 0, + }, + ], + id: '2', + }); + + controller.close(); + }, + }); + + const onToolCallMock = vi.fn(); + + const protocolStream = OpenAIStream(mockOpenAIStream, { + onToolCall: onToolCallMock, + }); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + 'id: 2\n', + 'event: tool_calls\n', + `data: [{"function":{"name":"tool1","arguments":"{}"},"id":"call_1","index":0,"type":"function"},{"function":{"name":"tool2","arguments":"{}"},"id":"call_2","index":1,"type":"function"}]\n\n`, + ]); + + expect(onToolCallMock).toHaveBeenCalledTimes(1); + }); + + it('should handle empty stream', async () => { + const mockStream = new ReadableStream({ + start(controller) { + controller.close(); + }, + }); + + const protocolStream = OpenAIStream(mockStream); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([]); + }); + + it('should handle delta content null', async () => { + const mockOpenAIStream = new ReadableStream({ + start(controller) { + controller.enqueue({ + choices: [ + { + delta: { content: null }, + index: 0, + }, + ], + id: '3', + }); + + controller.close(); + }, + }); + + const protocolStream = OpenAIStream(mockOpenAIStream); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual(['id: 3\n', 'event: data\n', `data: {"content":null}\n\n`]); + }); + + it('should handle other delta data', async () => { + const mockOpenAIStream = new ReadableStream({ + start(controller) { + controller.enqueue({ + choices: [ + { + delta: { custom_field: 'custom_value' }, + index: 0, + }, + ], + id: '4', + }); + + controller.close(); + }, + }); + + const protocolStream = OpenAIStream(mockOpenAIStream); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + 'id: 4\n', + 'event: data\n', + `data: {"delta":{"custom_field":"custom_value"},"id":"4","index":0}\n\n`, + ]); + }); + + it('should handle tool calls without index and type', async () => { + const mockOpenAIStream = new ReadableStream({ + start(controller) { + controller.enqueue({ + choices: [ + { + delta: { + tool_calls: [ + { + function: { name: 'tool1', arguments: '{}' }, + id: 'call_1', + }, + { + function: { name: 'tool2', arguments: '{}' }, + id: 'call_2', + }, + ], + }, + index: 0, + }, + ], + id: '5', + }); + + controller.close(); + }, + }); + + const protocolStream = OpenAIStream(mockOpenAIStream); + + const decoder = new TextDecoder(); + const chunks = []; + + // @ts-ignore + for await (const chunk of protocolStream) { + chunks.push(decoder.decode(chunk, { stream: true })); + } + + expect(chunks).toEqual([ + 'id: 5\n', + 'event: tool_calls\n', + `data: [{"function":{"name":"tool1","arguments":"{}"},"id":"call_1","index":0,"type":"function"},{"function":{"name":"tool2","arguments":"{}"},"id":"call_2","index":1,"type":"function"}]\n\n`, + ]); + }); +}); diff --git a/src/store/tool/slices/builtin/action.test.ts b/src/store/tool/slices/builtin/action.test.ts new file mode 100644 index 000000000000..b953ec480fb9 --- /dev/null +++ b/src/store/tool/slices/builtin/action.test.ts @@ -0,0 +1,90 @@ +import { act, renderHook } from '@testing-library/react'; +import { describe, expect, it, vi } from 'vitest'; + +import { useToolStore } from '../../store'; + +vi.mock('zustand/traditional'); + +describe('createBuiltinToolSlice', () => { + describe('invokeBuiltinTool', () => { + it('should return if the tool is already loading', async () => { + // Given + const key = 'text2image'; + const params = {}; + + const mockFn = vi.fn(); + const { result } = renderHook(() => useToolStore()); + + act(() => { + useToolStore.setState({ + text2image: mockFn, + }); + }); + + await act(async () => { + // When + const data = await result.current.invokeBuiltinTool(key, params); + expect(data).toBeUndefined(); + }); + + // Then + expect(mockFn).toHaveBeenCalled(); + }); + + it('should invoke the specified tool action and return the stringified result', async () => { + // Given + const key = 'text2image'; + + const mockFn = vi.fn(); + const { result } = renderHook(() => useToolStore()); + + const params = { + prompts: ['test prompt'], + size: '512x512', + quality: 'standard', + style: 'vivid', + }; + + act(() => { + useToolStore.setState({ + builtinToolLoading: { [key]: false }, + text2image: mockFn, + }); + }); + // When + await act(async () => { + await result.current.invokeBuiltinTool(key, params); + }); + + expect(mockFn).toBeCalledWith({ + prompts: ['test prompt'], + quality: 'standard', + size: '512x512', + style: 'vivid', + }); + }); + }); + + describe('text2image', () => { + it('should map the prompts to DallEImageItem objects', () => { + // When + const { result } = renderHook(() => useToolStore()); + + const data = result.current.text2image( + { + prompts: ['prompt1', 'prompt2'], + size: '1024x1024', + quality: 'standard', + style: 'vivid', + }, + 'a', + ); + + // Then + expect(data).toEqual([ + { prompt: 'prompt1', quality: 'standard', size: '1024x1024', style: 'vivid' }, + { prompt: 'prompt2', quality: 'standard', size: '1024x1024', style: 'vivid' }, + ]); + }); + }); +}); diff --git a/src/utils/fetch.test.ts b/src/utils/fetch.test.ts index 954c08551987..e70151340d32 100644 --- a/src/utils/fetch.test.ts +++ b/src/utils/fetch.test.ts @@ -1,8 +1,10 @@ +import { fetchEventSource } from '@microsoft/fetch-event-source'; +import { FetchEventSourceInit } from '@microsoft/fetch-event-source'; import { afterEach, describe, expect, it, vi } from 'vitest'; import { ErrorResponse } from '@/types/fetch'; -import { getMessageError, parseToolCalls } from './fetch'; +import { fetchSSE, getMessageError, parseToolCalls } from './fetch'; // 模拟 i18next vi.mock('i18next', () => ({ @@ -39,6 +41,10 @@ const createMockResponse = (body: any, ok: boolean, status: number = 200) => ({ }, }); +vi.mock('@microsoft/fetch-event-source', () => ({ + fetchEventSource: vi.fn(), +})); + // 在每次测试后清理所有模拟 afterEach(() => { vi.restoreAllMocks(); @@ -168,3 +174,150 @@ describe('parseToolCalls', () => { ]); }); }); + +describe('fetchSSE', () => { + it('should handle text event correctly', async () => { + const mockOnMessageHandle = vi.fn(); + const mockOnFinish = vi.fn(); + + (fetchEventSource as any).mockImplementationOnce( + (url: string, options: FetchEventSourceInit) => { + options.onopen!({ clone: () => ({ ok: true, headers: new Headers() }) } as any); + options.onmessage!({ event: 'text', data: JSON.stringify('Hello') } as any); + options.onmessage!({ event: 'text', data: JSON.stringify(' World') } as any); + }, + ); + + await fetchSSE('/', { onMessageHandle: mockOnMessageHandle, onFinish: mockOnFinish }); + + expect(mockOnMessageHandle).toHaveBeenNthCalledWith(1, { text: 'Hello', type: 'text' }); + expect(mockOnMessageHandle).toHaveBeenNthCalledWith(2, { text: ' World', type: 'text' }); + expect(mockOnFinish).toHaveBeenCalledWith('Hello World', { + observationId: null, + toolCalls: undefined, + traceId: null, + type: 'done', + }); + }); + + it('should handle tool_calls event correctly', async () => { + const mockOnMessageHandle = vi.fn(); + const mockOnFinish = vi.fn(); + + (fetchEventSource as any).mockImplementationOnce( + (url: string, options: FetchEventSourceInit) => { + options.onopen!({ clone: () => ({ ok: true, headers: new Headers() }) } as any); + options.onmessage!({ + event: 'tool_calls', + data: JSON.stringify([ + { index: 0, id: '1', type: 'function', function: { name: 'func1', arguments: 'arg1' } }, + ]), + } as any); + options.onmessage!({ + event: 'tool_calls', + data: JSON.stringify([ + { index: 1, id: '2', type: 'function', function: { name: 'func2', arguments: 'arg2' } }, + ]), + } as any); + }, + ); + + await fetchSSE('/', { onMessageHandle: mockOnMessageHandle, onFinish: mockOnFinish }); + + expect(mockOnMessageHandle).toHaveBeenNthCalledWith(1, { + tool_calls: [{ id: '1', type: 'function', function: { name: 'func1', arguments: 'arg1' } }], + type: 'tool_calls', + }); + expect(mockOnMessageHandle).toHaveBeenNthCalledWith(2, { + tool_calls: [ + { id: '1', type: 'function', function: { name: 'func1', arguments: 'arg1' } }, + { id: '2', type: 'function', function: { name: 'func2', arguments: 'arg2' } }, + ], + type: 'tool_calls', + }); + expect(mockOnFinish).toHaveBeenCalledWith('', { + observationId: null, + toolCalls: [ + { id: '1', type: 'function', function: { name: 'func1', arguments: 'arg1' } }, + { id: '2', type: 'function', function: { name: 'func2', arguments: 'arg2' } }, + ], + traceId: null, + type: 'done', + }); + }); + + it('should call onAbort when AbortError is thrown', async () => { + const mockOnAbort = vi.fn(); + + (fetchEventSource as any).mockImplementationOnce( + (url: string, options: FetchEventSourceInit) => { + options.onmessage!({ event: 'text', data: JSON.stringify('Hello') } as any); + options.onerror!({ name: 'AbortError' }); + }, + ); + + await fetchSSE('/', { onAbort: mockOnAbort }); + + expect(mockOnAbort).toHaveBeenCalledWith('Hello'); + }); + + it('should call onErrorHandle when other error is thrown', async () => { + const mockOnErrorHandle = vi.fn(); + const mockError = new Error('Unknown error'); + + (fetchEventSource as any).mockImplementationOnce( + (url: string, options: FetchEventSourceInit) => { + options.onerror!(mockError); + }, + ); + + await fetchSSE('/', { onErrorHandle: mockOnErrorHandle }); + + expect(mockOnErrorHandle).not.toHaveBeenCalled(); + }); + + it('should call onErrorHandle when response is not ok', async () => { + const mockOnErrorHandle = vi.fn(); + + (fetchEventSource as any).mockImplementationOnce( + (url: string, options: FetchEventSourceInit) => { + const res = new Response(JSON.stringify({ errorType: 'SomeError' }), { + status: 400, + statusText: 'Error', + }); + + options.onopen!(res as any); + }, + ); + + await fetchSSE('/', { onErrorHandle: mockOnErrorHandle }); + + expect(mockOnErrorHandle).toHaveBeenCalledWith({ + body: undefined, + message: 'translated_response.SomeError', + type: 'SomeError', + }); + }); + + it('should call onMessageHandle with full text if no message event', async () => { + const mockOnMessageHandle = vi.fn(); + const mockOnFinish = vi.fn(); + + (fetchEventSource as any).mockImplementationOnce( + (url: string, options: FetchEventSourceInit) => { + const res = new Response('Hello World', { status: 200, statusText: 'OK' }); + options.onopen!(res as any); + }, + ); + + await fetchSSE('/', { onMessageHandle: mockOnMessageHandle, onFinish: mockOnFinish }); + + expect(mockOnMessageHandle).toHaveBeenCalledWith({ text: 'Hello World', type: 'text' }); + expect(mockOnFinish).toHaveBeenCalledWith('Hello World', { + observationId: null, + toolCalls: undefined, + traceId: null, + type: 'done', + }); + }); +}); diff --git a/src/utils/fetch.ts b/src/utils/fetch.ts index d89cae89a9c9..bc2d70aa16ed 100644 --- a/src/utils/fetch.ts +++ b/src/utils/fetch.ts @@ -162,7 +162,8 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio if (response) { // if there is no onMessageHandler, we should call onHandleMessage first if (!triggerOnMessageHandler) { - options.onMessageHandle?.({ text: await response.clone().text(), type: 'text' }); + output = await response.clone().text(); + options.onMessageHandle?.({ text: output, type: 'text' }); } const traceId = response.headers.get(LOBE_CHAT_TRACE_ID); From 5e5add82780e49405814c52abfc3dbc339fedd2f Mon Sep 17 00:00:00 2001 From: arvinxx Date: Sat, 11 May 2024 22:00:54 +0800 Subject: [PATCH 22/24] =?UTF-8?q?=E2=9C=85=20test:=20add=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/store/chat/slices/plugin/action.test.ts | 345 ++++++++------------ 1 file changed, 138 insertions(+), 207 deletions(-) diff --git a/src/store/chat/slices/plugin/action.test.ts b/src/store/chat/slices/plugin/action.test.ts index 627bf70286bb..199c80909127 100644 --- a/src/store/chat/slices/plugin/action.test.ts +++ b/src/store/chat/slices/plugin/action.test.ts @@ -2,6 +2,7 @@ import { act, renderHook } from '@testing-library/react'; import { Md5 } from 'ts-md5'; import { Mock, afterEach, describe, expect, it, vi } from 'vitest'; +import { LOADING_FLAT } from '@/const/message'; import { PLUGIN_SCHEMA_API_MD5_PREFIX, PLUGIN_SCHEMA_SEPARATOR } from '@/const/plugin'; import { chatService } from '@/services/chat'; import { messageService } from '@/services/message'; @@ -174,248 +175,178 @@ describe('ChatPluginAction', () => { }); }); - describe.skip('triggerToolCalls', () => { - it('should trigger a function call and update the plugin message accordingly', async () => { - const messageId = 'message-id'; - const messageContent = JSON.stringify({ - tool_calls: [ + describe('triggerToolCalls', () => { + it('should trigger tool calls for the assistant message', async () => { + const assistantId = 'assistant-id'; + const message = { + id: assistantId, + role: 'assistant', + content: 'Assistant message', + tools: [ { - id: 'call_sbca', - type: 'function', - function: { - name: `pluginName${PLUGIN_SCHEMA_SEPARATOR}apiName`, - arguments: { key: 'value' }, - }, + id: 'tool1', + type: 'standalone', + identifier: 'plugin1', + apiName: 'api1', + arguments: '{}', }, - ], - }); - const messagePluginPayload = { - apiName: 'apiName', - identifier: 'pluginName', - type: 'default', - arguments: { key: 'value' }, - }; - - const refreshSpy = vi.spyOn(useChatStore.getState(), 'refreshMessages'); - const invokeSpy = vi.spyOn(useChatStore.getState(), 'invokeDefaultTypePlugin'); - - const { result } = renderHook(() => useChatStore()); - - await act(async () => { - useChatStore.setState({ - runPluginApi: vi.fn(), - messages: [ - { - id: messageId, - content: messageContent, - tools: [ - { - id: 'call_sbca', - type: 'default', - identifier: 'pluginName', - apiName: 'apiName', - arguments: "{ key: 'value' }", - }, - ], - } as ChatMessage, - ], - }); - - await result.current.triggerToolCalls(messageId); - }); - - expect(messageService.updateMessage).toHaveBeenCalledWith(messageId, { - content: '', - plugin: messagePluginPayload, - role: 'function', - }); - expect(refreshSpy).toHaveBeenCalled(); - expect(invokeSpy).toHaveBeenCalledWith(messageId, messagePluginPayload); - }); - - it('should handle function call with MD5 prefixed API name', async () => { - const messageId = 'message-id'; - const apiName = 'originalApiName'; - const id = 'pluginIdentifier'; - const md5ApiName = PLUGIN_SCHEMA_API_MD5_PREFIX + Md5.hashStr(apiName).toString(); - const messageContent = JSON.stringify({ - tool_calls: [ { - id: 'call_sbca', - type: 'function', - function: { - name: id + PLUGIN_SCHEMA_SEPARATOR + md5ApiName, - arguments: {}, - }, + id: 'tool2', + type: 'markdown', + identifier: 'plugin2', + apiName: 'api2', + arguments: '{}', + }, + { + id: 'tool3', + type: 'builtin', + identifier: 'builtin1', + apiName: 'api3', + arguments: '{}', }, - ], - }); - - const plugin = { identifier: id, manifest: { api: [{ name: apiName }] } } as LobeTool; - - useToolStore.setState({ installedPlugins: [plugin] }); - - vi.spyOn(chatSelectors, 'getMessageById').mockImplementationOnce( - () => () => - ({ - id: messageId, - content: messageContent, - }) as any, - ); - - const { result } = renderHook(() => useChatStore()); - vi.spyOn(result.current, 'invokeDefaultTypePlugin'); - vi.spyOn(result.current, 'refreshMessages'); - - await act(async () => { - await result.current.triggerToolCalls(messageId); - }); - expect(result.current.refreshMessages).toHaveBeenCalled(); - - expect(messageService.updateMessage).toHaveBeenCalledWith( - messageId, - expect.objectContaining({ - // 确保正确的 API 名称被设置 - plugin: expect.objectContaining({ apiName }), - }), - ); - expect(result.current.invokeDefaultTypePlugin).toHaveBeenCalledWith( - messageId, - expect.objectContaining({ - apiName: apiName, - }), - ); - }); - - it('should handle standalone plugin type', async () => { - const messageId = 'message-id'; - const messageContent = JSON.stringify({ - tool_calls: [ { - id: 'call_scv', - function: { - name: `pluginName${PLUGIN_SCHEMA_SEPARATOR}apiName${PLUGIN_SCHEMA_SEPARATOR}standalone`, - arguments: {}, - }, + id: 'tool4', + type: 'default', + identifier: 'plugin3', + apiName: 'api4', + arguments: '{}', }, ], - }); + } as ChatMessage; - const invokeStandaloneTypePlugin = useChatStore.getState().invokeStandaloneTypePlugin; + const invokeStandaloneTypePluginMock = vi.fn(); + const invokeMarkdownTypePluginMock = vi.fn(); + const invokeBuiltinToolMock = vi.fn(); + const invokeDefaultTypePluginMock = vi.fn().mockResolvedValue('Default tool response'); + const triggerAIMessageMock = vi.fn(); + const internal_createMessageMock = vi.fn().mockResolvedValue('tool-message-id'); + const getTraceIdByMessageIdMock = vi.fn().mockReturnValue('trace-id'); act(() => { useChatStore.setState({ - refreshMessages: vi.fn(), - invokeStandaloneTypePlugin: vi.fn(), + messages: [message], + invokeStandaloneTypePlugin: invokeStandaloneTypePluginMock, + invokeMarkdownTypePlugin: invokeMarkdownTypePluginMock, + invokeBuiltinTool: invokeBuiltinToolMock, + invokeDefaultTypePlugin: invokeDefaultTypePluginMock, + triggerAIMessage: triggerAIMessageMock, + internal_createMessage: internal_createMessageMock, + activeId: 'session-id', + activeTopicId: 'topic-id', }); }); - vi.spyOn(chatSelectors, 'getMessageById').mockImplementation( - () => () => - ({ - id: messageId, - content: messageContent, - }) as any, - ); - const { result } = renderHook(() => useChatStore()); await act(async () => { - await result.current.triggerToolCalls(messageId); - }); - - // 验证 refreshMessages 是否被调用 - expect(result.current.refreshMessages).toHaveBeenCalled(); - - // 验证 invokeDefaultTypePlugin 是否没有被调用,因为类型是 standalone - expect(result.current.invokeDefaultTypePlugin).not.toHaveBeenCalled(); - expect(result.current.invokeStandaloneTypePlugin).toHaveBeenCalled(); + await result.current.triggerToolCalls(assistantId); + }); + + // Verify that tool messages were created for each tool call + expect(internal_createMessageMock).toHaveBeenCalledTimes(4); + expect(internal_createMessageMock).toHaveBeenCalledWith({ + content: LOADING_FLAT, + parentId: assistantId, + plugin: message.tools![0], + role: 'tool', + sessionId: 'session-id', + tool_call_id: 'tool1', + topicId: 'topic-id', + }); + // ... similar assertions for other tool calls + + // Verify that the appropriate plugin types were invoked + expect(invokeStandaloneTypePluginMock).toHaveBeenCalledWith( + 'tool-message-id', + message.tools![0], + ); + expect(invokeMarkdownTypePluginMock).toHaveBeenCalledWith( + 'tool-message-id', + message.tools![1], + ); + expect(invokeBuiltinToolMock).toHaveBeenCalledWith('tool-message-id', message.tools![2]); + expect(invokeDefaultTypePluginMock).toHaveBeenCalledWith( + 'tool-message-id', + message.tools![3], + ); - useChatStore.setState({ invokeStandaloneTypePlugin }); + // Verify that AI message was triggered for default type tool call + // expect(getTraceIdByMessageIdMock).toHaveBeenCalledWith('tool-message-id'); + // expect(triggerAIMessageMock).toHaveBeenCalledWith({ traceId: 'trace-id' }); }); - it('should handle builtin plugin type', async () => { - const messageId = 'message-id'; - const messageContent = JSON.stringify({ - tool_calls: [ + it('should not trigger AI message if no default type tool calls', async () => { + const assistantId = 'assistant-id'; + const message = { + id: assistantId, + role: 'assistant', + content: 'Assistant message', + tools: [ { - id: 'call_scv', - function: { - name: `pluginName${PLUGIN_SCHEMA_SEPARATOR}apiName${PLUGIN_SCHEMA_SEPARATOR}builtin`, - arguments: {}, - }, + id: 'tool1', + type: 'standalone', + identifier: 'plugin1', + apiName: 'api1', + arguments: '{}', + }, + { + id: 'tool2', + type: 'markdown', + identifier: 'plugin2', + apiName: 'api2', + arguments: '{}', }, - ], - }); - - const invokeBuiltinTool = useChatStore.getState().invokeBuiltinTool; - useChatStore.setState({ refreshMessages: vi.fn(), invokeBuiltinTool: vi.fn() }); - - vi.spyOn(chatSelectors, 'getMessageById').mockImplementation( - () => () => - ({ - id: messageId, - content: messageContent, - }) as any, - ); - - const { result } = renderHook(() => useChatStore()); - - await act(async () => { - await result.current.triggerToolCalls(messageId); - }); - - // 验证 refreshMessages 是否被调用 - expect(result.current.refreshMessages).toHaveBeenCalled(); - - // 验证 invokeDefaultTypePlugin 是否没有被调用,因为类型是 standalone - expect(result.current.invokeDefaultTypePlugin).not.toHaveBeenCalled(); - expect(result.current.invokeBuiltinTool).toHaveBeenCalled(); - - useChatStore.setState({ invokeBuiltinTool }); - }); - - it('should handle markdown plugin type', async () => { - const messageId = 'message-id'; - const messageContent = JSON.stringify({ - tool_calls: [ { - id: 'call_scv', - function: { - name: `pluginName${PLUGIN_SCHEMA_SEPARATOR}apiName${PLUGIN_SCHEMA_SEPARATOR}markdown`, - arguments: {}, - }, + id: 'tool3', + type: 'builtin', + identifier: 'builtin1', + apiName: 'api3', + arguments: '{}', }, ], - }); + } as ChatMessage; - const invokeMarkdownTypePlugin = useChatStore.getState().invokeMarkdownTypePlugin; - useChatStore.setState({ - refreshMessages: vi.fn(), - invokeMarkdownTypePlugin: vi.fn(), - }); + const invokeStandaloneTypePluginMock = vi.fn(); + const invokeMarkdownTypePluginMock = vi.fn(); + const invokeBuiltinToolMock = vi.fn(); + const triggerAIMessageMock = vi.fn(); + const internal_createMessageMock = vi.fn().mockResolvedValue('tool-message-id'); - vi.spyOn(chatSelectors, 'getMessageById').mockImplementation( - () => () => - ({ - id: messageId, - content: messageContent, - }) as any, - ); + act(() => { + useChatStore.setState({ + invokeStandaloneTypePlugin: invokeStandaloneTypePluginMock, + invokeMarkdownTypePlugin: invokeMarkdownTypePluginMock, + invokeBuiltinTool: invokeBuiltinToolMock, + triggerAIMessage: triggerAIMessageMock, + internal_createMessage: internal_createMessageMock, + activeId: 'session-id', + messages: [message], + activeTopicId: 'topic-id', + }); + }); const { result } = renderHook(() => useChatStore()); await act(async () => { - await result.current.triggerToolCalls(messageId); + await result.current.triggerToolCalls(assistantId); }); - // 验证 refreshMessages 是否被调用 - expect(result.current.refreshMessages).toHaveBeenCalled(); + // Verify that tool messages were created for each tool call + expect(internal_createMessageMock).toHaveBeenCalledTimes(3); - expect(result.current.invokeDefaultTypePlugin).not.toHaveBeenCalled(); - expect(result.current.invokeMarkdownTypePlugin).toHaveBeenCalled(); + // Verify that the appropriate plugin types were invoked + expect(invokeStandaloneTypePluginMock).toHaveBeenCalledWith( + 'tool-message-id', + message.tools![0], + ); + expect(invokeMarkdownTypePluginMock).toHaveBeenCalledWith( + 'tool-message-id', + message.tools![1], + ); + expect(invokeBuiltinToolMock).toHaveBeenCalledWith('tool-message-id', message.tools![2]); - useChatStore.setState({ invokeMarkdownTypePlugin }); + // Verify that AI message was not triggered + expect(triggerAIMessageMock).not.toHaveBeenCalled(); }); }); From 0677a51f507e4ff25aac7cbab491c56f5f000262 Mon Sep 17 00:00:00 2001 From: arvinxx Date: Sat, 11 May 2024 22:47:25 +0800 Subject: [PATCH 23/24] =?UTF-8?q?=F0=9F=8E=A8=20chore:=20improve=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/libs/agent-runtime/google/index.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/libs/agent-runtime/google/index.ts b/src/libs/agent-runtime/google/index.ts index cf4c0689ac9b..708b39e16341 100644 --- a/src/libs/agent-runtime/google/index.ts +++ b/src/libs/agent-runtime/google/index.ts @@ -271,8 +271,6 @@ export class LobeGoogleAI implements LobeRuntimeAI { }; private convertSchemaObject(schema: JSONSchema7): FunctionDeclarationSchemaProperty { - console.log('input:', schema); - switch (schema.type) { default: case 'object': { From fed708346bdb6d780e13ebf409dcb0dd594eee69 Mon Sep 17 00:00:00 2001 From: arvinxx Date: Sat, 11 May 2024 23:11:03 +0800 Subject: [PATCH 24/24] =?UTF-8?q?=F0=9F=90=9B=20fix:=20fix=20auto=20meta?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/features/AgentSetting/store/action.ts | 21 +++++++++++++------ src/store/chat/slices/enchance/action.ts | 25 ++++++++++++++--------- src/utils/fetch.ts | 2 +- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/features/AgentSetting/store/action.ts b/src/features/AgentSetting/store/action.ts index 5176fe661253..ad0fb7ca6bde 100644 --- a/src/features/AgentSetting/store/action.ts +++ b/src/features/AgentSetting/store/action.ts @@ -8,6 +8,7 @@ import { TraceNameMap, TracePayload, TraceTopicType } from '@/const/trace'; import { chatService } from '@/services/chat'; import { LobeAgentConfig } from '@/types/agent'; import { MetaData } from '@/types/meta'; +import { MessageTextChunk } from '@/utils/fetch'; import { setNamespace } from '@/utils/storeDebug'; import { SessionLoadingState } from '../store/initialState'; @@ -246,17 +247,25 @@ export const store: StateCreator = (set, g streamUpdateMetaArray: (key: keyof MetaData) => { let value = ''; - return (text: string) => { - value += text; - get().dispatchMeta({ type: 'update', value: { [key]: value.split(',') } }); + return (chunk: MessageTextChunk) => { + switch (chunk.type) { + case 'text': { + value += chunk.text; + get().dispatchMeta({ type: 'update', value: { [key]: value.split(',') } }); + } + } }; }, streamUpdateMetaString: (key: keyof MetaData) => { let value = ''; - return (text: string) => { - value += text; - get().dispatchMeta({ type: 'update', value: { [key]: value } }); + return (chunk: MessageTextChunk) => { + switch (chunk.type) { + case 'text': { + value += chunk.text; + get().dispatchMeta({ type: 'update', value: { [key]: value } }); + } + } }; }, diff --git a/src/store/chat/slices/enchance/action.ts b/src/store/chat/slices/enchance/action.ts index 778fe5246b95..b28f0a69654e 100644 --- a/src/store/chat/slices/enchance/action.ts +++ b/src/store/chat/slices/enchance/action.ts @@ -76,16 +76,21 @@ export const chatEnhance: StateCreator< // translate to target language await chatService.fetchPresetTaskResult({ - onMessageHandle: (text) => { - internal_dispatchMessage({ - id, - key: 'translate', - type: 'updateMessageExtra', - value: produce({ content: '', from, to: targetLang }, (draft) => { - content += text; - draft.content += content; - }), - }); + onMessageHandle: (chunk) => { + switch (chunk.type) { + case 'text': { + internal_dispatchMessage({ + id, + key: 'translate', + type: 'updateMessageExtra', + value: produce({ content: '', from, to: targetLang }, (draft) => { + content += chunk.text; + draft.content += content; + }), + }); + break; + } + } }, params: chainTranslate(message.content, targetLang), trace: get().getCurrentTracePayload({ traceName: TraceNameMap.Translator }), diff --git a/src/utils/fetch.ts b/src/utils/fetch.ts index bc2d70aa16ed..748cf044fe86 100644 --- a/src/utils/fetch.ts +++ b/src/utils/fetch.ts @@ -45,7 +45,7 @@ export type OnFinishHandler = ( }, ) => Promise; -interface MessageTextChunk { +export interface MessageTextChunk { text: string; type: 'text'; }