From 5d11c4f9699844d86d9512fedc5b10ea92d8e223 Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Tue, 7 May 2024 10:00:43 +0000
Subject: [PATCH 01/24] =?UTF-8?q?=E2=9C=A8=20feat:=20finish=20OpenAI=20str?=
 =?UTF-8?q?eam=20tool=5Fcalls=20protocol?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 package.json                                  |   1 +
 src/database/client/schemas/message.ts        |  12 +-
 src/features/Conversation/Actions/index.ts    |   1 +
 .../Conversation/Messages/Assistant.tsx       |  26 ---
 .../Messages/Assistant/ToolCalls/Settings.tsx |  39 +++++
 .../Messages/Assistant/ToolCalls/index.tsx    |  72 ++++++++
 .../Messages/Assistant/ToolCalls/style.ts     |  25 +++
 .../Conversation/Messages/Assistant/index.tsx |  36 ++++
 .../Conversation/Messages/Tool/index.tsx      |  39 +++++
 src/features/Conversation/Messages/index.ts   |   2 +
 .../components/ChatItem/index.tsx             |   4 +
 src/libs/agent-runtime/types/chat.ts          |   5 +-
 .../agent-runtime/utils/anthropicHelpers.ts   |   5 +-
 .../utils/openaiCompatibleFactory/index.ts    |   5 +-
 src/libs/agent-runtime/utils/response.ts      |  12 ++
 src/libs/agent-runtime/utils/streams/index.ts |   2 +
 .../agent-runtime/utils/streams/openai.ts     |  26 +++
 .../agent-runtime/utils/streams/protocol.ts   |  41 +++++
 src/locales/default/plugin.ts                 |   2 +
 src/services/chat.ts                          |  79 +++++----
 src/store/chat/slices/message/action.test.ts  |  11 --
 src/store/chat/slices/message/action.ts       | 126 ++++++--------
 src/store/chat/slices/message/reducer.ts      |  19 +-
 src/store/chat/slices/message/selectors.ts    |  10 +-
 src/store/chat/slices/plugin/action.test.ts   |  24 +--
 src/store/chat/slices/plugin/action.ts        | 163 +++++++++---------
 src/types/llm.ts                              |   2 +-
 src/types/message/index.ts                    |  15 +-
 src/types/message/tools.ts                    |  57 ++++++
 src/types/openai/chat.ts                      |   6 +
 src/utils/fetch.test.ts                       |  93 +++++++++-
 src/utils/fetch.ts                            | 140 ++++++++++-----
 32 files changed, 795 insertions(+), 305 deletions(-)
 delete mode 100644 src/features/Conversation/Messages/Assistant.tsx
 create mode 100644 src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx
 create mode 100644 src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx
 create mode 100644 src/features/Conversation/Messages/Assistant/ToolCalls/style.ts
 create mode 100644 src/features/Conversation/Messages/Assistant/index.tsx
 create mode 100644 src/features/Conversation/Messages/Tool/index.tsx
 create mode 100644 src/libs/agent-runtime/utils/response.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/index.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/openai.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/protocol.ts

diff --git a/package.json b/package.json
index 62798c97c1ac..3ddfca595c23 100644
--- a/package.json
+++ b/package.json
@@ -97,6 +97,7 @@
     "@lobehub/icons": "latest",
     "@lobehub/tts": "latest",
     "@lobehub/ui": "^1.138.17",
+    "@microsoft/fetch-event-source": "^2.0.1",
     "@next/third-parties": "^14.2.3",
     "@sentry/nextjs": "^7.112.2",
     "@t3-oss/env-nextjs": "^0.10.1",
diff --git a/src/database/client/schemas/message.ts b/src/database/client/schemas/message.ts
index f198e0159529..288606953581 100644
--- a/src/database/client/schemas/message.ts
+++ b/src/database/client/schemas/message.ts
@@ -1,6 +1,8 @@
 /* eslint-disable sort-keys-fix/sort-keys-fix  */
 import { z } from 'zod';
 
+import { MessageToolCallSchema } from '@/types/message';
+
 const TranslateSchema = z.object({
   from: z.string().optional(),
   to: z.string(),
@@ -14,13 +16,21 @@ const PluginSchema = z.object({
   type: z.enum(['default', 'markdown', 'standalone', 'builtin']).default('default'),
 });
 
+const ToolCallSchema = PluginSchema.extend({
+  id: z.string(),
+});
+
 export const DB_MessageSchema = z.object({
-  role: z.enum(['user', 'system', 'assistant', 'function']),
+  role: z.enum(['user', 'system', 'assistant', 'function', 'tool']),
   content: z.string(),
   files: z.array(z.string()).optional(),
   favorite: z.number().int().min(0).max(1).optional(),
   error: z.any().optional(),
 
+  tool_calls: z.array(MessageToolCallSchema).optional(),
+  tools: z.array(ToolCallSchema).optional(),
+  tool: ToolCallSchema.optional(),
+
   plugin: PluginSchema.optional(),
   pluginState: z.any().optional(),
   fromModel: z.string().optional(),
diff --git a/src/features/Conversation/Actions/index.ts b/src/features/Conversation/Actions/index.ts
index 72c7f1724b9f..71feebab93b2 100644
--- a/src/features/Conversation/Actions/index.ts
+++ b/src/features/Conversation/Actions/index.ts
@@ -15,6 +15,7 @@ export const renderActions: Record<LLMRoleType, RenderAction> = {
   assistant: AssistantActionsBar,
   function: FunctionActionsBar,
   system: DefaultActionsBar,
+  tool: FunctionActionsBar,
   user: UserActionsBar,
 };
 
diff --git a/src/features/Conversation/Messages/Assistant.tsx b/src/features/Conversation/Messages/Assistant.tsx
deleted file mode 100644
index 6a64f124aa6c..000000000000
--- a/src/features/Conversation/Messages/Assistant.tsx
+++ /dev/null
@@ -1,26 +0,0 @@
-import { ReactNode, memo } from 'react';
-
-import { isFunctionMessageAtStart } from '@/const/message';
-import { useChatStore } from '@/store/chat';
-import { chatSelectors } from '@/store/chat/selectors';
-import { ChatMessage } from '@/types/message';
-
-import Inspector from '../Plugins/Inspector';
-import { DefaultMessage } from './Default';
-
-export const AssistantMessage = memo<
-  ChatMessage & {
-    editableContent: ReactNode;
-  }
->(({ id, plugin, content, ...props }) => {
-  const fcProps = useChatStore(chatSelectors.getFunctionMessageProps({ content, id, plugin }));
-
-  if (!isFunctionMessageAtStart(content))
-    return <DefaultMessage content={content} id={id} {...props} />;
-
-  return (
-    <div id={id}>
-      <Inspector {...fcProps} />
-    </div>
-  );
-});
diff --git a/src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx b/src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx
new file mode 100644
index 000000000000..e9183dfe24cf
--- /dev/null
+++ b/src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx
@@ -0,0 +1,39 @@
+import { ActionIcon } from '@lobehub/ui';
+import { LucideSettings } from 'lucide-react';
+import { memo, useState } from 'react';
+import { useTranslation } from 'react-i18next';
+
+import PluginDetailModal from '@/features/PluginDetailModal';
+import { pluginHelpers, useToolStore } from '@/store/tool';
+import { pluginSelectors } from '@/store/tool/selectors';
+
+const Settings = memo<{ id: string }>(({ id }) => {
+  const item = useToolStore(pluginSelectors.getPluginManifestById(id));
+  const [open, setOpen] = useState(false);
+  const { t } = useTranslation('plugin');
+  const hasSettings = pluginHelpers.isSettingSchemaNonEmpty(item?.settings);
+
+  return (
+    hasSettings && (
+      <>
+        <ActionIcon
+          icon={LucideSettings}
+          onClick={() => {
+            setOpen(true);
+          }}
+          title={t('setting')}
+        />
+        <PluginDetailModal
+          id={id}
+          onClose={() => {
+            setOpen(false);
+          }}
+          open={open}
+          schema={item?.settings}
+        />
+      </>
+    )
+  );
+});
+
+export default Settings;
diff --git a/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx b/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx
new file mode 100644
index 000000000000..681778525e1f
--- /dev/null
+++ b/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx
@@ -0,0 +1,72 @@
+import { Loading3QuartersOutlined } from '@ant-design/icons';
+import { Avatar, Highlighter, Icon } from '@lobehub/ui';
+import isEqual from 'fast-deep-equal';
+import { LucideChevronDown, LucideChevronUp, LucideToyBrick } from 'lucide-react';
+import { memo, useState } from 'react';
+import { useTranslation } from 'react-i18next';
+import { Flexbox } from 'react-layout-kit';
+
+import { pluginHelpers, useToolStore } from '@/store/tool';
+import { toolSelectors } from '@/store/tool/selectors';
+
+import { useStyles } from './style';
+
+export interface InspectorProps {
+  arguments?: string;
+  identifier: string;
+  loading?: boolean;
+}
+
+const Inspector = memo<InspectorProps>(({ arguments: requestArgs = '{}', loading, identifier }) => {
+  const { t } = useTranslation('plugin');
+  const { styles } = useStyles();
+  const [open, setOpen] = useState(false);
+
+  const pluginMeta = useToolStore(toolSelectors.getMetaById(identifier), isEqual);
+
+  const pluginAvatar = pluginHelpers.getPluginAvatar(pluginMeta);
+
+  const pluginTitle = pluginHelpers.getPluginTitle(pluginMeta) ?? t('plugins.loading');
+
+  const avatar = pluginAvatar ? (
+    <Avatar avatar={pluginAvatar} size={32} />
+  ) : (
+    <Icon icon={LucideToyBrick} />
+  );
+
+  let params;
+  try {
+    params = JSON.stringify(JSON.parse(requestArgs), null, 2);
+  } catch {
+    params = requestArgs;
+  }
+
+  return (
+    <Flexbox gap={8}>
+      <Flexbox align={'center'} distribution={'space-between'} gap={24} horizontal>
+        <Flexbox
+          align={'center'}
+          className={styles.container}
+          gap={8}
+          horizontal
+          onClick={() => {
+            setOpen(!open);
+          }}
+        >
+          {loading ? (
+            <div>
+              <Loading3QuartersOutlined spin />
+            </div>
+          ) : (
+            avatar
+          )}
+          {pluginTitle}
+          <Icon icon={open ? LucideChevronUp : LucideChevronDown} />
+        </Flexbox>
+      </Flexbox>
+      {open && <Highlighter language={'json'}>{params}</Highlighter>}
+    </Flexbox>
+  );
+});
+
+export default Inspector;
diff --git a/src/features/Conversation/Messages/Assistant/ToolCalls/style.ts b/src/features/Conversation/Messages/Assistant/ToolCalls/style.ts
new file mode 100644
index 000000000000..3cef48295aa4
--- /dev/null
+++ b/src/features/Conversation/Messages/Assistant/ToolCalls/style.ts
@@ -0,0 +1,25 @@
+import { createStyles } from 'antd-style';
+
+export const useStyles = createStyles(({ css, token }) => ({
+  container: css`
+    cursor: pointer;
+
+    width: fit-content;
+    padding-inline: 4px 6px;
+
+    color: ${token.colorText};
+
+    background: ${token.colorFillTertiary};
+    border-radius: 8px;
+
+    &:hover {
+      background: ${token.colorFillSecondary};
+    }
+  `,
+  plugin: css`
+    display: flex;
+    gap: 4px;
+    align-items: center;
+    width: fit-content;
+  `,
+}));
diff --git a/src/features/Conversation/Messages/Assistant/index.tsx b/src/features/Conversation/Messages/Assistant/index.tsx
new file mode 100644
index 000000000000..d9ed3b78fb98
--- /dev/null
+++ b/src/features/Conversation/Messages/Assistant/index.tsx
@@ -0,0 +1,36 @@
+import { ReactNode, memo } from 'react';
+import { Flexbox } from 'react-layout-kit';
+
+import { useChatStore } from '@/store/chat';
+import { chatSelectors } from '@/store/chat/selectors';
+import { ChatMessage } from '@/types/message';
+
+import { DefaultMessage } from '../Default';
+import ToolCalls from './ToolCalls';
+
+export const AssistantMessage = memo<
+  ChatMessage & {
+    editableContent: ReactNode;
+  }
+>(({ id, tools, content, ...props }) => {
+  const editing = useChatStore(chatSelectors.isMessageEditing(id));
+
+  return (
+    <Flexbox gap={8} id={id}>
+      {(content || editing) && (
+        <DefaultMessage content={content} id={undefined as any} {...props} />
+      )}
+      {!editing && (
+        <Flexbox gap={8} horizontal>
+          {tools?.map((toolCall) => (
+            <ToolCalls
+              arguments={toolCall.arguments}
+              identifier={toolCall.identifier}
+              key={toolCall.id}
+            />
+          ))}
+        </Flexbox>
+      )}
+    </Flexbox>
+  );
+});
diff --git a/src/features/Conversation/Messages/Tool/index.tsx b/src/features/Conversation/Messages/Tool/index.tsx
new file mode 100644
index 000000000000..86683a46cca9
--- /dev/null
+++ b/src/features/Conversation/Messages/Tool/index.tsx
@@ -0,0 +1,39 @@
+import isEqual from 'fast-deep-equal';
+import { memo, useState } from 'react';
+import { Flexbox } from 'react-layout-kit';
+
+import { LOADING_FLAT } from '@/const/message';
+import { useChatStore } from '@/store/chat';
+import { chatSelectors } from '@/store/chat/selectors';
+import { ChatMessage } from '@/types/message';
+
+import Inspector from '../../Plugins/Inspector';
+import PluginRender from '../../Plugins/Render';
+import BubblesLoading from '../../components/BubblesLoading';
+
+export const ToolMessage = memo<ChatMessage>(({ id, content, tool }) => {
+  const fcProps = useChatStore(
+    chatSelectors.getFunctionMessageProps({ content, id, plugin: tool }),
+    isEqual,
+  );
+
+  const [showRender, setShow] = useState(true);
+
+  if (content === LOADING_FLAT) return <BubblesLoading />;
+
+  return (
+    <Flexbox gap={12} id={id} width={'100%'}>
+      <Inspector showRender={showRender} {...fcProps} setShow={setShow} />
+      {showRender && (
+        <PluginRender
+          content={content}
+          id={id}
+          identifier={tool?.identifier}
+          loading={fcProps.loading}
+          payload={fcProps.command}
+          type={fcProps.type}
+        />
+      )}
+    </Flexbox>
+  );
+});
diff --git a/src/features/Conversation/Messages/index.ts b/src/features/Conversation/Messages/index.ts
index 273223593c66..7a0469eef0c9 100644
--- a/src/features/Conversation/Messages/index.ts
+++ b/src/features/Conversation/Messages/index.ts
@@ -7,12 +7,14 @@ import { OnAvatarsClick, RenderMessage } from '../types';
 import { AssistantMessage } from './Assistant';
 import { DefaultMessage } from './Default';
 import { FunctionMessage } from './Function';
+import { ToolMessage } from './Tool';
 import { UserMessage } from './User';
 
 export const renderMessages: Record<string, RenderMessage> = {
   assistant: AssistantMessage,
   default: DefaultMessage,
   function: FunctionMessage,
+  tool: ToolMessage,
   user: UserMessage,
 };
 
diff --git a/src/features/Conversation/components/ChatItem/index.tsx b/src/features/Conversation/components/ChatItem/index.tsx
index 30eecbf31835..89545ee06fea 100644
--- a/src/features/Conversation/components/ChatItem/index.tsx
+++ b/src/features/Conversation/components/ChatItem/index.tsx
@@ -56,6 +56,10 @@ const Item = memo<ChatListItemProps>(({ index, id }) => {
   }, isEqual);
 
   const historyLength = useChatStore((s) => chatSelectors.currentChats(s).length);
+  const [editing, toggleMessageEditing] = useChatStore((s) => [
+    chatSelectors.isMessageEditing(id)(s),
+    s.toggleMessageEditing,
+  ]);
 
   const [isMessageLoading, generating, editing, toggleMessageEditing, updateMessageContent] =
     useChatStore((s) => [
diff --git a/src/libs/agent-runtime/types/chat.ts b/src/libs/agent-runtime/types/chat.ts
index a32951793aed..b7b5eb918d1d 100644
--- a/src/libs/agent-runtime/types/chat.ts
+++ b/src/libs/agent-runtime/types/chat.ts
@@ -1,6 +1,8 @@
 import { OpenAIStreamCallbacks } from 'ai';
 
-export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function';
+import { MessageToolCall } from '@/types/message';
+
+export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function' | 'tool';
 
 interface UserMessageContentPartText {
   text: string;
@@ -30,6 +32,7 @@ export interface OpenAIChatMessage {
    * @description 消息发送者的角色
    */
   role: LLMRoleType;
+  tool_calls?: MessageToolCall[];
 }
 
 /**
diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.ts b/src/libs/agent-runtime/utils/anthropicHelpers.ts
index 9a1bab628737..0528f296a38f 100644
--- a/src/libs/agent-runtime/utils/anthropicHelpers.ts
+++ b/src/libs/agent-runtime/utils/anthropicHelpers.ts
@@ -32,7 +32,10 @@ export const buildAnthropicMessage = (
   const content = message.content as string | UserMessageContentPart[];
   return {
     content: typeof content === 'string' ? content : content.map((c) => buildAnthropicBlock(c)),
-    role: message.role === 'function' || message.role === 'system' ? 'assistant' : message.role,
+    role:
+      message.role === 'tool' || message.role === 'function' || message.role === 'system'
+        ? 'assistant'
+        : message.role,
   };
 };
 
diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
index 89a4c8ec2fe6..1c58bf5b1723 100644
--- a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
+++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
@@ -1,4 +1,3 @@
-import { OpenAIStream, StreamingTextResponse } from 'ai';
 import OpenAI, { ClientOptions } from 'openai';
 
 import { LOBE_DEFAULT_MODEL_LIST } from '@/config/modelProviders';
@@ -11,6 +10,8 @@ import { AgentRuntimeError } from '../createError';
 import { debugStream } from '../debugStream';
 import { desensitizeUrl } from '../desensitizeUrl';
 import { handleOpenAIError } from '../handleOpenAIError';
+import { StreamingResponse } from '../response';
+import { OpenAIStream } from '../streams';
 
 // the model contains the following keywords is not a chat model, so we should filter them out
 const CHAT_MODELS_BLOCK_LIST = [
@@ -86,7 +87,7 @@ export const LobeOpenAICompatibleFactory = ({
           debugStream(useForDebug.toReadableStream()).catch(console.error);
         }
 
-        return new StreamingTextResponse(OpenAIStream(prod, options?.callback), {
+        return StreamingResponse(OpenAIStream(prod, options?.callback), {
           headers: options?.headers,
         });
       } catch (error) {
diff --git a/src/libs/agent-runtime/utils/response.ts b/src/libs/agent-runtime/utils/response.ts
new file mode 100644
index 000000000000..f8de781c9e2d
--- /dev/null
+++ b/src/libs/agent-runtime/utils/response.ts
@@ -0,0 +1,12 @@
+export const StreamingResponse = (
+  stream: ReadableStream,
+  options?: { headers?: Record<string, string> },
+) => {
+  return new Response(stream, {
+    headers: {
+      'Cache-Control': 'no-cache',
+      'Content-Type': 'text/event-stream',
+      ...options?.headers,
+    },
+  });
+};
diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts
new file mode 100644
index 000000000000..0a8bed033dba
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/index.ts
@@ -0,0 +1,2 @@
+export * from './openai';
+export * from './protocol';
diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts
new file mode 100644
index 000000000000..dd8010fffe36
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/openai.ts
@@ -0,0 +1,26 @@
+import { createCallbacksTransformer } from 'ai';
+import OpenAI from 'openai';
+import type { Stream } from 'openai/streaming';
+
+import { ChatStreamCallbacks } from '../../types';
+import { transformOpenAIStream } from './protocol';
+
+export const OpenAIStream = (
+  stream: Stream<OpenAI.ChatCompletionChunk>,
+  callbacks?: ChatStreamCallbacks,
+) => {
+  return stream
+    .toReadableStream()
+    .pipeThrough(
+      new TransformStream({
+        transform: (chunk, controller) => {
+          const { type, id, data } = transformOpenAIStream(chunk);
+
+          controller.enqueue(`id: ${id}\n`);
+          controller.enqueue(`event: ${type}\n`);
+          controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
+        },
+      }),
+    )
+    .pipeThrough(createCallbacksTransformer(callbacks));
+};
diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts
new file mode 100644
index 000000000000..d2a5b873147f
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/protocol.ts
@@ -0,0 +1,41 @@
+import OpenAI from 'openai';
+
+interface StreamProtocolChunk {
+  data: any;
+  id: string;
+  type: 'text' | 'tool_calls' | 'data' | 'stop';
+}
+
+export const transformOpenAIStream = (chunk: Uint8Array): StreamProtocolChunk => {
+  const decoder = new TextDecoder();
+
+  const chunkValue = decoder.decode(chunk, { stream: true });
+  const jsonValue: OpenAI.ChatCompletionChunk = JSON.parse(chunkValue);
+
+  // maybe need another structure to add support for multiple choices
+  const item = jsonValue.choices[0];
+
+  if (typeof item.delta.content === 'string') {
+    return { data: item.delta.content, id: jsonValue.id, type: 'text' };
+  }
+
+  if (item.delta.tool_calls) {
+    return { data: item.delta.tool_calls, id: jsonValue.id, type: 'tool_calls' };
+  }
+
+  if (item.delta.content === null) {
+    return { data: item.delta, id: jsonValue.id, type: 'data' };
+  }
+
+  // 给定结束原因
+  if (item.finish_reason) {
+    return { data: item.finish_reason, id: jsonValue.id, type: 'stop' };
+  }
+
+  // 其余情况下，返回 delta 和 index
+  return {
+    data: { delta: item.delta, id: jsonValue.id, index: item.index },
+    id: jsonValue.id,
+    type: 'data',
+  };
+};
diff --git a/src/locales/default/plugin.ts b/src/locales/default/plugin.ts
index 25b4a954b244..cd8a58857183 100644
--- a/src/locales/default/plugin.ts
+++ b/src/locales/default/plugin.ts
@@ -4,7 +4,9 @@ export default {
     function_call: '函数调用',
     off: '关闭调试',
     on: '查看插件调用信息',
+    payload: '插件载荷',
     response: '返回结果',
+    tool_call: '工具调用请求',
   },
   detailModal: {
     info: {
diff --git a/src/services/chat.ts b/src/services/chat.ts
index 2198a063155d..9af430a7f940 100644
--- a/src/services/chat.ts
+++ b/src/services/chat.ts
@@ -24,13 +24,13 @@ import { ChatErrorType } from '@/types/fetch';
 import { ChatMessage } from '@/types/message';
 import type { ChatStreamPayload, OpenAIChatMessage } from '@/types/openai/chat';
 import { UserMessageContentPart } from '@/types/openai/chat';
-import { FetchSSEOptions, OnFinishHandler, fetchSSE, getMessageError } from '@/utils/fetch';
+import { FetchSSEOptions, fetchSSE, getMessageError } from '@/utils/fetch';
 import { createTraceHeader, getTraceId } from '@/utils/trace';
 
 import { createHeaderWithAuth, getProviderAuthPayload } from './_auth';
 import { API_ENDPOINTS } from './_url';
 
-interface FetchOptions {
+interface FetchOptions extends FetchSSEOptions {
   isWelcomeQuestion?: boolean;
   signal?: AbortSignal | undefined;
   trace?: TracePayload;
@@ -40,23 +40,14 @@ interface GetChatCompletionPayload extends Partial<Omit<ChatStreamPayload, 'mess
   messages: ChatMessage[];
 }
 
-interface FetchAITaskResultParams {
+interface FetchAITaskResultParams extends FetchSSEOptions {
   abortController?: AbortController;
-  /**
-   * 错误处理函数
-   */
   onError?: (e: Error, rawError?: any) => void;
-  onFinish?: OnFinishHandler;
   /**
    * 加载状态变化处理函数
    * @param loading - 是否处于加载状态
    */
   onLoadingChange?: (loading: boolean) => void;
-  /**
-   * 消息处理函数
-   * @param text - 消息内容
-   */
-  onMessageHandle?: (text: string) => void;
   /**
    * 请求对象
    */
@@ -224,20 +215,15 @@ class ChatService {
     trace,
     isWelcomeQuestion,
   }: CreateAssistantMessageStream) => {
-    await fetchSSE(
-      () =>
-        this.createAssistantMessage(params, {
-          isWelcomeQuestion,
-          signal: abortController?.signal,
-          trace: this.mapTrace(trace, TraceTagMap.Chat),
-        }),
-      {
-        onAbort,
-        onErrorHandle,
-        onFinish,
-        onMessageHandle,
-      },
-    );
+    await this.createAssistantMessage(params, {
+      isWelcomeQuestion,
+      onAbort,
+      onErrorHandle,
+      onFinish,
+      onMessageHandle,
+      signal: abortController?.signal,
+      trace: this.mapTrace(trace, TraceTagMap.Chat),
+    });
   };
 
   getChatCompletion = async (params: Partial<ChatStreamPayload>, options?: FetchOptions) => {
@@ -299,10 +285,14 @@ class ChatService {
       provider,
     });
 
-    return fetch(API_ENDPOINTS.chat(provider), {
+    return fetchSSE(API_ENDPOINTS.chat(provider), {
       body: JSON.stringify(payload),
       headers,
       method: 'POST',
+      onAbort: options?.onAbort,
+      onErrorHandle: options?.onErrorHandle,
+      onFinish: options?.onFinish,
+      onMessageHandle: options?.onMessageHandle,
       signal,
     });
   };
@@ -360,20 +350,15 @@ class ChatService {
 
     onLoadingChange?.(true);
 
-    const data = await fetchSSE(
-      () =>
-        this.getChatCompletion(params, {
-          signal: abortController?.signal,
-          trace: this.mapTrace(trace, TraceTagMap.SystemChain),
-        }),
-      {
-        onErrorHandle: (error) => {
-          errorHandle(new Error(error.message), error);
-        },
-        onFinish,
-        onMessageHandle,
+    const data = await this.getChatCompletion(params, {
+      onErrorHandle: (error) => {
+        errorHandle(new Error(error.message), error);
       },
-    ).catch(errorHandle);
+      onFinish,
+      onMessageHandle,
+      signal: abortController?.signal,
+      trace: this.mapTrace(trace, TraceTagMap.SystemChain),
+    }).catch(errorHandle);
 
     onLoadingChange?.(false);
 
@@ -424,11 +409,25 @@ class ChatService {
           return { content: getContent(m), role: m.role };
         }
 
+        case 'assistant': {
+          return { content: m.content, role: m.role, tool_calls: m.tool_calls };
+        }
+
+        // TODO: need to be removed after upgrade
         case 'function': {
           const name = m.plugin?.identifier as string;
           return { content: m.content, name, role: m.role };
         }
 
+        case 'tool': {
+          return {
+            content: m.content,
+            name: m.tool_calls?.find((tool) => tool.id === m.tool?.id)?.function.name,
+            role: m.role,
+            tool_call_id: m.tool?.id,
+          };
+        }
+
         default: {
           return { content: m.content, role: m.role };
         }
diff --git a/src/store/chat/slices/message/action.test.ts b/src/store/chat/slices/message/action.test.ts
index a6355ba96850..d6d6f634ac3d 100644
--- a/src/store/chat/slices/message/action.test.ts
+++ b/src/store/chat/slices/message/action.test.ts
@@ -645,10 +645,7 @@ describe('chatMessage actions', () => {
           messages,
           assistantMessageId,
         );
-        expect(response.content).toEqual(aiResponse);
         expect(response.isFunctionCall).toEqual(false);
-        expect(response.functionCallAtEnd).toEqual(false);
-        expect(response.functionCallContent).toEqual('');
       });
     });
 
@@ -667,10 +664,7 @@ describe('chatMessage actions', () => {
           messages,
           assistantMessageId,
         );
-        expect(response.content).toEqual(aiResponse);
         expect(response.isFunctionCall).toEqual(true);
-        expect(response.functionCallAtEnd).toEqual(false);
-        expect(response.functionCallContent).toEqual('');
       });
     });
 
@@ -689,12 +683,7 @@ describe('chatMessage actions', () => {
           messages,
           assistantMessageId,
         );
-        expect(response.content).toEqual(aiResponse);
         expect(response.isFunctionCall).toEqual(true);
-        expect(response.functionCallAtEnd).toEqual(true);
-        expect(response.functionCallContent).toEqual(
-          '{"tool_calls":[{"id":"call_sbca","type":"function","function":{"name":"pluginName____apiName","arguments":{"key":"value"}}}]}',
-        );
       });
     });
 
diff --git a/src/store/chat/slices/message/action.ts b/src/store/chat/slices/message/action.ts
index b256bcb89b81..1409db34a38d 100644
--- a/src/store/chat/slices/message/action.ts
+++ b/src/store/chat/slices/message/action.ts
@@ -6,7 +6,7 @@ import { template } from 'lodash-es';
 import { SWRResponse, mutate } from 'swr';
 import { StateCreator } from 'zustand/vanilla';
 
-import { LOADING_FLAT, isFunctionMessageAtStart, testFunctionMessageAtEnd } from '@/const/message';
+import { LOADING_FLAT } from '@/const/message';
 import { TraceEventType, TraceNameMap } from '@/const/trace';
 import { useClientDataSWR } from '@/libs/swr';
 import { chatService } from '@/services/chat';
@@ -17,7 +17,7 @@ import { useAgentStore } from '@/store/agent';
 import { agentSelectors } from '@/store/agent/selectors';
 import { chatHelpers } from '@/store/chat/helpers';
 import { ChatStore } from '@/store/chat/store';
-import { ChatMessage } from '@/types/message';
+import { ChatMessage, MessageToolCall } from '@/types/message';
 import { TraceEventPayloads } from '@/types/trace';
 import { setNamespace } from '@/utils/storeDebug';
 import { nanoid } from '@/utils/uuid';
@@ -105,9 +105,6 @@ export interface ChatMessageAction {
     assistantMessageId: string,
     params?: ProcessMessageParams,
   ) => Promise<{
-    content: string;
-    functionCallAtEnd: boolean;
-    functionCallContent: string;
     isFunctionCall: boolean;
     traceId?: string;
   }>;
@@ -123,7 +120,11 @@ export interface ChatMessageAction {
    * @param id
    * @param content
    */
-  internal_updateMessageContent: (id: string, content: string) => Promise<void>;
+  internal_updateMessageContent: (
+    id: string,
+    content: string,
+    toolCalls?: MessageToolCall[],
+  ) => Promise<void>;
   internal_createMessage: (params: CreateMessageParams) => Promise<string>;
   internal_resendMessage: (id: string, traceId?: string) => Promise<void>;
   internal_traceMessage: (id: string, payload: TraceEventPayloads) => Promise<void>;
@@ -310,8 +311,7 @@ export const chatMessage: StateCreator<
 
   // the internal process method of the AI message
   internal_coreProcessMessage: async (messages, userMessageId, params) => {
-    const { internal_fetchAIChatMessage, triggerFunctionCall, refreshMessages, activeTopicId } =
-      get();
+    const { internal_fetchAIChatMessage, triggerToolCalls, refreshMessages, activeTopicId } = get();
 
     const { model, provider } = getAgentConfig();
 
@@ -327,39 +327,15 @@ export const chatMessage: StateCreator<
       topicId: activeTopicId, // if there is activeTopicId，then add it to topicId
     };
 
-    const mid = await get().internal_createMessage(assistantMessage);
+    const assistantId = await get().internal_createMessage(assistantMessage);
 
     // 2. fetch the AI response
-    const { isFunctionCall, content, functionCallAtEnd, functionCallContent, traceId } =
-      await internal_fetchAIChatMessage(messages, mid, params);
+    const { isFunctionCall } = await internal_fetchAIChatMessage(messages, assistantId, params);
 
     // 3. if it's the function call message, trigger the function method
     if (isFunctionCall) {
-      let functionId = mid;
-
-      // if the function call is at the end of the message, then create a new function message
-      if (functionCallAtEnd) {
-        // create a new separate message and remove the function call from the prev message
-
-        await get().internal_updateMessageContent(mid, content.replace(functionCallContent, ''));
-
-        const functionMessage: CreateMessageParams = {
-          role: 'function',
-          content: functionCallContent,
-          fromModel: model,
-          fromProvider: provider,
-
-          parentId: userMessageId,
-          sessionId: get().activeId,
-          topicId: activeTopicId,
-          traceId,
-        };
-
-        functionId = await get().internal_createMessage(functionMessage);
-      }
-
       await refreshMessages();
-      await triggerFunctionCall(functionId);
+      await triggerToolCalls(assistantId);
     }
   },
   internal_dispatchMessage: (payload) => {
@@ -369,7 +345,7 @@ export const chatMessage: StateCreator<
 
     const messages = messagesReducer(get().messages, payload);
 
-    set({ messages }, false, n(`dispatchMessage/${payload.type}`, payload));
+    set({ messages }, false, { type: `dispatchMessage/${payload.type}`, payload });
   },
   internal_fetchAIChatMessage: async (messages, assistantId, params) => {
     const {
@@ -432,10 +408,7 @@ export const chatMessage: StateCreator<
         config.params.max_tokens = 2048;
     }
 
-    let output = '';
     let isFunctionCall = false;
-    let functionCallAtEnd = false;
-    let functionCallContent = '';
     let msgTraceId: string | undefined;
 
     const { startAnimation, stopAnimation, outputQueue, isAnimationActive } =
@@ -464,7 +437,7 @@ export const chatMessage: StateCreator<
       onAbort: async () => {
         stopAnimation();
       },
-      onFinish: async (content, { traceId, observationId }) => {
+      onFinish: async (content, { traceId, observationId, toolCalls }) => {
         stopAnimation();
         // if there is traceId, update it
         if (traceId) {
@@ -483,22 +456,27 @@ export const chatMessage: StateCreator<
         }
 
         // update the content after fetch result
-        await internal_updateMessageContent(assistantId, content);
+        await internal_updateMessageContent(assistantId, content, toolCalls);
       },
-      onMessageHandle: async (text) => {
-        output += text;
-        outputQueue.push(...text.split(''));
-
-        // is this message is just a function call
-        if (isFunctionMessageAtStart(output)) {
-          stopAnimation();
-          internal_dispatchMessage({
-            id: assistantId,
-            key: 'content',
-            type: 'updateMessage',
-            value: output,
-          });
-          isFunctionCall = true;
+      onMessageHandle: async (chunk) => {
+        switch (chunk.type) {
+          case 'text': {
+            outputQueue.push(...chunk.text.split(''));
+            break;
+          }
+
+          // is this message is just a tool call
+          case 'tool_calls': {
+            internal_dispatchMessage({
+              id: assistantId,
+              type: 'updateMessages',
+              value: {
+                tool_calls: chunk.tool_calls,
+                tools: get().internal_transformToolCalls(chunk.tool_calls),
+              },
+            });
+            isFunctionCall = true;
+          }
         }
 
         // if it's the first time to receive the message,
@@ -510,23 +488,7 @@ export const chatMessage: StateCreator<
 
     internal_toggleChatLoading(false, undefined, n('generateMessage(end)') as string);
 
-    // also exist message like this:
-    // 请稍等，我帮您查询一下。{"tool_calls":[{"id":"call_sbca","type":"function","function":{"name":"pluginName____apiName","arguments":{"key":"value"}}}]}
-    if (!isFunctionCall) {
-      const { content, valid } = testFunctionMessageAtEnd(output);
-
-      // if fc at end, replace the message
-      if (valid) {
-        isFunctionCall = true;
-        functionCallAtEnd = true;
-        functionCallContent = content;
-      }
-    }
-
     return {
-      content: output,
-      functionCallAtEnd,
-      functionCallContent,
       isFunctionCall,
       traceId: msgTraceId,
     };
@@ -610,15 +572,27 @@ export const chatMessage: StateCreator<
     await internal_coreProcessMessage(contextMessages, latestMsg.id, { traceId });
   },
 
-  internal_updateMessageContent: async (id, content) => {
-    const { internal_dispatchMessage, refreshMessages } = get();
+  internal_updateMessageContent: async (id, content, toolCalls) => {
+    const { internal_dispatchMessage, refreshMessages, internal_transformToolCalls } = get();
 
     // Due to the async update method and refresh need about 100ms
     // we need to update the message content at the frontend to avoid the update flick
     // refs: https://medium.com/@kyledeguzmanx/what-are-optimistic-updates-483662c3e171
-    internal_dispatchMessage({ id, key: 'content', type: 'updateMessage', value: content });
+    if (toolCalls) {
+      internal_dispatchMessage({
+        id,
+        type: 'updateMessages',
+        value: { tool_calls: toolCalls, tools: internal_transformToolCalls(toolCalls) },
+      });
+    } else {
+      internal_dispatchMessage({ id, type: 'updateMessages', value: { content } });
+    }
 
-    await messageService.updateMessage(id, { content });
+    await messageService.updateMessage(id, {
+      content,
+      tool_calls: toolCalls,
+      tools: toolCalls ? internal_transformToolCalls(toolCalls) : undefined,
+    });
     await refreshMessages();
   },
 
@@ -685,7 +659,7 @@ export const chatMessage: StateCreator<
             buffer += charsToAdd;
 
             // 更新消息内容，这里可能需要结合实际情况调整
-            internal_dispatchMessage({ id, key: 'content', type: 'updateMessage', value: buffer });
+            internal_dispatchMessage({ id, type: 'updateMessages', value: { content: buffer } });
 
             // 设置下一个字符的延迟
             animationTimeoutId = setTimeout(updateText, 16); // 16 毫秒的延迟模拟打字机效果
diff --git a/src/store/chat/slices/message/reducer.ts b/src/store/chat/slices/message/reducer.ts
index ee75d0436b31..0310e4e9df8e 100644
--- a/src/store/chat/slices/message/reducer.ts
+++ b/src/store/chat/slices/message/reducer.ts
@@ -11,6 +11,13 @@ interface UpdateMessage {
   type: 'updateMessage';
   value: ChatMessage[keyof ChatMessage];
 }
+
+interface UpdateMessages {
+  id: string;
+  type: 'updateMessages';
+  value: Partial<ChatMessage>;
+}
+
 interface CreateMessage {
   id: string;
   type: 'createMessage';
@@ -37,6 +44,7 @@ interface UpdateMessageExtra {
 export type MessageDispatch =
   | CreateMessage
   | UpdateMessage
+  | UpdateMessages
   | UpdatePluginState
   | UpdateMessageExtra
   | DeleteMessage;
@@ -54,6 +62,15 @@ export const messagesReducer = (state: ChatMessage[], payload: MessageDispatch):
         message.updatedAt = Date.now();
       });
     }
+    case 'updateMessages': {
+      return produce(state, (draftState) => {
+        const { id, value } = payload;
+        const index = draftState.findIndex((i) => i.id === id);
+        if (index < 0) return;
+
+        draftState[index] = merge(draftState[index], { ...value, updatedAt: Date.now() });
+      });
+    }
 
     case 'updateMessageExtra': {
       return produce(state, (draftState) => {
@@ -67,7 +84,7 @@ export const messagesReducer = (state: ChatMessage[], payload: MessageDispatch):
           message.extra[key] = value;
         }
 
-        message.updateAt = Date.now();
+        message.updatedAt = Date.now();
       });
     }
 
diff --git a/src/store/chat/slices/message/selectors.ts b/src/store/chat/slices/message/selectors.ts
index 91307800179e..20bb131b8c13 100644
--- a/src/store/chat/slices/message/selectors.ts
+++ b/src/store/chat/slices/message/selectors.ts
@@ -28,17 +28,9 @@ const getMeta = (message: ChatMessage) => {
       return message.meta;
     }
 
-    case 'assistant': {
+    default: {
       return sessionMetaSelectors.currentAgentMeta(useSessionStore.getState());
     }
-
-    case 'function': {
-      // TODO: 后续改成将 plugin metadata 写入 message metadata 的方案
-      return {
-        avatar: '🧩',
-        title: 'plugin-unknown',
-      };
-    }
   }
 };
 
diff --git a/src/store/chat/slices/plugin/action.test.ts b/src/store/chat/slices/plugin/action.test.ts
index cb7c2647251e..35c6d3cbc26d 100644
--- a/src/store/chat/slices/plugin/action.test.ts
+++ b/src/store/chat/slices/plugin/action.test.ts
@@ -8,7 +8,7 @@ import { messageService } from '@/services/message';
 import { chatSelectors } from '@/store/chat/selectors';
 import { useChatStore } from '@/store/chat/store';
 import { useToolStore } from '@/store/tool';
-import { ChatPluginPayload } from '@/types/message';
+import { ChatToolPayload } from '@/types/message';
 import { LobeTool } from '@/types/tool';
 
 const invokeStandaloneTypePlugin = useChatStore.getState().invokeStandaloneTypePlugin;
@@ -172,7 +172,7 @@ describe('ChatPluginAction', () => {
     });
   });
 
-  describe('triggerFunctionCall', () => {
+  describe('triggerToolCalls', () => {
     it('should trigger a function call and update the plugin message accordingly', async () => {
       const messageId = 'message-id';
       const messageContent = JSON.stringify({
@@ -207,7 +207,7 @@ describe('ChatPluginAction', () => {
       const { result } = renderHook(() => useChatStore());
 
       await act(async () => {
-        await result.current.triggerFunctionCall(messageId);
+        await result.current.triggerToolCalls(messageId);
       });
 
       expect(chatSelectors.getMessageById).toHaveBeenCalledWith(messageId);
@@ -255,7 +255,7 @@ describe('ChatPluginAction', () => {
       vi.spyOn(result.current, 'refreshMessages');
 
       await act(async () => {
-        await result.current.triggerFunctionCall(messageId);
+        await result.current.triggerToolCalls(messageId);
       });
       expect(result.current.refreshMessages).toHaveBeenCalled();
 
@@ -308,7 +308,7 @@ describe('ChatPluginAction', () => {
       const { result } = renderHook(() => useChatStore());
 
       await act(async () => {
-        await result.current.triggerFunctionCall(messageId);
+        await result.current.triggerToolCalls(messageId);
       });
 
       // 验证 refreshMessages 是否被调用
@@ -349,7 +349,7 @@ describe('ChatPluginAction', () => {
       const { result } = renderHook(() => useChatStore());
 
       await act(async () => {
-        await result.current.triggerFunctionCall(messageId);
+        await result.current.triggerToolCalls(messageId);
       });
 
       // 验证 refreshMessages 是否被调用
@@ -393,7 +393,7 @@ describe('ChatPluginAction', () => {
       const { result } = renderHook(() => useChatStore());
 
       await act(async () => {
-        await result.current.triggerFunctionCall(messageId);
+        await result.current.triggerToolCalls(messageId);
       });
 
       // 验证 refreshMessages 是否被调用
@@ -510,7 +510,7 @@ describe('ChatPluginAction', () => {
       const payload = {
         apiName: 'text2image',
         arguments: JSON.stringify({ key: 'value' }),
-      } as ChatPluginPayload;
+      } as ChatToolPayload;
 
       const messageId = 'message-id';
       const toolResponse = JSON.stringify({ abc: 'data' });
@@ -557,7 +557,7 @@ describe('ChatPluginAction', () => {
       const payload = {
         apiName: 'text2image',
         arguments: JSON.stringify({ key: 'value' }),
-      } as ChatPluginPayload;
+      } as ChatToolPayload;
 
       const messageId = 'message-id';
       const toolResponse = 'Builtin tool response';
@@ -606,7 +606,7 @@ describe('ChatPluginAction', () => {
       const payload = {
         apiName: 'builtinApi',
         arguments: JSON.stringify({ key: 'value' }),
-      } as ChatPluginPayload;
+      } as ChatToolPayload;
 
       const messageId = 'message-id';
       const error = new Error('Builtin tool failed');
@@ -652,7 +652,7 @@ describe('ChatPluginAction', () => {
         identifier: 'abc',
         type: 'markdown',
         arguments: JSON.stringify({ key: 'value' }),
-      } as ChatPluginPayload;
+      } as ChatToolPayload;
       const messageId = 'message-id';
 
       const runPluginApiMock = vi.fn();
@@ -678,7 +678,7 @@ describe('ChatPluginAction', () => {
 
       const payload = {
         identifier: 'pluginName',
-      } as ChatPluginPayload;
+      } as ChatToolPayload;
 
       act(() => {
         useToolStore.setState({
diff --git a/src/store/chat/slices/plugin/action.ts b/src/store/chat/slices/plugin/action.ts
index 4715b3938538..f928d5b6a2f4 100644
--- a/src/store/chat/slices/plugin/action.ts
+++ b/src/store/chat/slices/plugin/action.ts
@@ -3,14 +3,14 @@ import { t } from 'i18next';
 import { Md5 } from 'ts-md5';
 import { StateCreator } from 'zustand/vanilla';
 
+import { LOADING_FLAT } from '@/const/message';
 import { PLUGIN_SCHEMA_API_MD5_PREFIX, PLUGIN_SCHEMA_SEPARATOR } from '@/const/plugin';
 import { chatService } from '@/services/chat';
 import { CreateMessageParams, messageService } from '@/services/message';
 import { ChatStore } from '@/store/chat/store';
 import { useToolStore } from '@/store/tool';
 import { pluginSelectors } from '@/store/tool/selectors';
-import { ChatPluginPayload } from '@/types/message';
-import { OpenAIToolCall } from '@/types/openai/functionCall';
+import { ChatToolPayload, MessageToolCall } from '@/types/message';
 import { setNamespace } from '@/utils/storeDebug';
 
 import { chatSelectors } from '../../slices/message/selectors';
@@ -24,13 +24,15 @@ export interface ChatPluginAction {
     content: string,
     triggerAiMessage?: boolean,
   ) => Promise<void>;
-  invokeBuiltinTool: (id: string, payload: ChatPluginPayload) => Promise<void>;
-  invokeDefaultTypePlugin: (id: string, payload: any) => Promise<void>;
-  invokeMarkdownTypePlugin: (id: string, payload: ChatPluginPayload) => Promise<void>;
-  invokeStandaloneTypePlugin: (id: string, payload: ChatPluginPayload) => Promise<void>;
-  runPluginApi: (id: string, payload: ChatPluginPayload) => Promise<string | undefined>;
+  internal_transformToolCalls: (toolCalls: MessageToolCall[]) => ChatToolPayload[];
+  invokeBuiltinTool: (id: string, payload: ChatToolPayload) => Promise<void>;
+  invokeDefaultTypePlugin: (id: string, payload: any) => Promise<string | undefined>;
+  invokeMarkdownTypePlugin: (id: string, payload: ChatToolPayload) => Promise<void>;
+  invokeStandaloneTypePlugin: (id: string, payload: ChatToolPayload) => Promise<void>;
+  runPluginApi: (id: string, payload: ChatToolPayload) => Promise<string | undefined>;
   triggerAIMessage: (id: string, traceId?: string) => Promise<void>;
-  triggerFunctionCall: (id: string) => Promise<void>;
+  triggerToolCalls: (id: string) => Promise<void>;
+
   updatePluginState: (id: string, key: string, value: any) => Promise<void>;
 }
 
@@ -61,6 +63,35 @@ export const chatPlugin: StateCreator<
     if (triggerAiMessage) await triggerAIMessage(id);
   },
 
+  internal_transformToolCalls: (toolCalls) => {
+    return toolCalls.map((toolCall) => {
+      let payload: ChatToolPayload;
+
+      const [identifier, apiName, type] = toolCall.function.name.split(PLUGIN_SCHEMA_SEPARATOR);
+
+      payload = {
+        apiName,
+        arguments: toolCall.function.arguments,
+        id: toolCall.id,
+        identifier,
+        type: (type ?? 'default') as any,
+      };
+
+      // if the apiName is md5, try to find the correct apiName in the plugins
+      if (apiName.startsWith(PLUGIN_SCHEMA_API_MD5_PREFIX)) {
+        const md5 = apiName.replace(PLUGIN_SCHEMA_API_MD5_PREFIX, '');
+        const manifest = pluginSelectors.getPluginManifestById(identifier)(useToolStore.getState());
+
+        const api = manifest?.api.find((api) => Md5.hashStr(api.name).toString() === md5);
+        if (api) {
+          payload.apiName = api.name;
+        }
+      }
+
+      return payload;
+    });
+  },
+
   invokeBuiltinTool: async (id, payload) => {
     const { internal_toggleChatLoading, internal_updateMessageContent } = get();
     const params = JSON.parse(payload.arguments);
@@ -94,14 +125,13 @@ export const chatPlugin: StateCreator<
   },
 
   invokeDefaultTypePlugin: async (id, payload) => {
-    const { runPluginApi, triggerAIMessage } = get();
+    const { runPluginApi } = get();
 
     const data = await runPluginApi(id, payload);
 
     if (!data) return;
-    const traceId = chatSelectors.getTraceIdByMessageId(id)(get());
 
-    await triggerAIMessage(id, traceId);
+    return data;
   },
 
   invokeMarkdownTypePlugin: async (id, payload) => {
@@ -181,89 +211,66 @@ export const chatPlugin: StateCreator<
     await internal_coreProcessMessage(chats, id, { traceId });
   },
 
-  triggerFunctionCall: async (id) => {
-    const message = chatSelectors.getMessageById(id)(get());
-    if (!message) return;
+  triggerToolCalls: async (assistantId) => {
+    const message = chatSelectors.getMessageById(assistantId)(get());
+    if (!message || !message.tools) return;
 
     const {
       invokeDefaultTypePlugin,
       invokeMarkdownTypePlugin,
       invokeStandaloneTypePlugin,
       invokeBuiltinTool,
-      refreshMessages,
-      internal_resendMessage,
-      deleteMessage,
+      triggerAIMessage,
     } = get();
 
-    let payload = { apiName: '', identifier: '' } as ChatPluginPayload;
-
-    // 识别到内容是 function_call 的情况下
-    // 将 function_call 转换为 plugin request payload
-    if (message.content) {
-      const { tool_calls } = JSON.parse(message.content) as {
-        tool_calls: OpenAIToolCall[];
-      };
-
-      const function_call = tool_calls[0].function;
-
-      const [identifier, apiName, type] = function_call.name.split(PLUGIN_SCHEMA_SEPARATOR);
-
-      payload = {
-        apiName,
-        arguments: function_call.arguments,
-        identifier,
-        type: (type ?? 'default') as any,
+    let shouldCreateMessage = false;
+    let latestToolId = '';
+    const messagePools = message.tools.map(async (payload) => {
+      const toolMessage: CreateMessageParams = {
+        content: LOADING_FLAT,
+        parentId: assistantId,
+        role: 'tool',
+        sessionId: get().activeId,
+        tool: payload,
+        topicId: get().activeTopicId, // if there is activeTopicId，then add it to topicId
       };
 
-      // fix https://github.com/lobehub/lobe-chat/issues/1094, remove and retry after experiencing plugin illusion
-      if (!apiName) {
-        internal_resendMessage(id);
-        deleteMessage(id);
-        return;
-      }
-
-      // if the apiName is md5, try to find the correct apiName in the plugins
-      if (apiName.startsWith(PLUGIN_SCHEMA_API_MD5_PREFIX)) {
-        const md5 = apiName.replace(PLUGIN_SCHEMA_API_MD5_PREFIX, '');
-        const manifest = pluginSelectors.getPluginManifestById(identifier)(useToolStore.getState());
-
-        const api = manifest?.api.find((api) => Md5.hashStr(api.name).toString() === md5);
-        if (!api) return;
-        payload.apiName = api.name;
+      const id = await get().internal_createMessage(toolMessage);
+
+      switch (payload.type) {
+        case 'standalone': {
+          await invokeStandaloneTypePlugin(id, payload);
+          break;
+        }
+
+        case 'markdown': {
+          await invokeMarkdownTypePlugin(id, payload);
+          break;
+        }
+
+        case 'builtin': {
+          await invokeBuiltinTool(id, payload);
+          break;
+        }
+
+        default: {
+          const data = await invokeDefaultTypePlugin(id, payload);
+          if (data) {
+            shouldCreateMessage = true;
+            latestToolId = id;
+          }
+        }
       }
-    } else {
-      if (message.plugin) payload = message.plugin;
-    }
-
-    if (!payload.apiName) return;
-
-    await messageService.updateMessage(id, {
-      content: !!message.content ? '' : undefined,
-      plugin: payload,
-      role: 'function',
     });
-    await refreshMessages();
 
-    switch (payload.type) {
-      case 'standalone': {
-        await invokeStandaloneTypePlugin(id, payload);
-        break;
-      }
+    await Promise.all(messagePools);
 
-      case 'markdown': {
-        await invokeMarkdownTypePlugin(id, payload);
-        break;
-      }
+    // only default type tool calls should trigger AI message
+    if (!shouldCreateMessage) return;
 
-      case 'builtin': {
-        await invokeBuiltinTool(id, payload);
-        break;
-      }
+    const traceId = chatSelectors.getTraceIdByMessageId(latestToolId)(get());
 
-      default: {
-        await invokeDefaultTypePlugin(id, payload);
-      }
-    }
+    await triggerAIMessage(latestToolId, traceId);
   },
 
   updatePluginState: async (id, key, value) => {
diff --git a/src/types/llm.ts b/src/types/llm.ts
index 81bfac74c760..2e9d87ef3755 100644
--- a/src/types/llm.ts
+++ b/src/types/llm.ts
@@ -75,7 +75,7 @@ export interface LLMParams {
   top_p?: number;
 }
 
-export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function';
+export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function' | 'tool';
 
 export interface LLMMessage {
   content: string;
diff --git a/src/types/message/index.ts b/src/types/message/index.ts
index 0b8b5a1e7a01..c0bed2b61f99 100644
--- a/src/types/message/index.ts
+++ b/src/types/message/index.ts
@@ -5,7 +5,7 @@ import { ErrorType } from '@/types/fetch';
 
 import { LLMRoleType } from '../llm';
 import { BaseDataModel } from '../meta';
-import { ChatPluginPayload } from './tools';
+import { ChatPluginPayload, ChatToolPayload, MessageToolCall } from './tools';
 import { Translate } from './translate';
 
 /**
@@ -43,6 +43,10 @@ export interface ChatMessage extends BaseDataModel {
   } & Record<string, any>;
 
   files?: string[];
+  /**
+   * only used in tool calling
+   */
+  name?: string;
   /**
    * observation id
    */
@@ -51,9 +55,12 @@ export interface ChatMessage extends BaseDataModel {
    * parent message id
    */
   parentId?: string;
+  /**
+   * @deprecated
+   */
   plugin?: ChatPluginPayload;
-
   pluginState?: any;
+
   /**
    * quoted other message's id
    */
@@ -64,6 +71,10 @@ export interface ChatMessage extends BaseDataModel {
   role: LLMRoleType;
   sessionId?: string;
 
+  tool?: ChatToolPayload;
+  tool_calls?: MessageToolCall[];
+  tools?: ChatToolPayload[];
+
   /**
    * 保存到主题的消息
    */
diff --git a/src/types/message/tools.ts b/src/types/message/tools.ts
index 39a7cb92fb52..ecca26af6a15 100644
--- a/src/types/message/tools.ts
+++ b/src/types/message/tools.ts
@@ -1,3 +1,6 @@
+import { DeepPartial } from 'utility-types';
+import { z } from 'zod';
+
 import { LobeToolRenderType } from '@/types/tool';
 
 export interface ChatPluginPayload {
@@ -6,3 +9,57 @@ export interface ChatPluginPayload {
   identifier: string;
   type: LobeToolRenderType;
 }
+
+export interface ChatToolPayload {
+  apiName: string;
+  arguments: string;
+  id: string;
+  identifier: string;
+  type: LobeToolRenderType;
+}
+
+/**
+ * The function that the model called.
+ */
+export interface ToolFunction {
+  /**
+   * The arguments to call the function with, as generated by the model in JSON
+   * format. Note that the model does not always generate valid JSON, and may
+   * hallucinate parameters not defined by your function schema. Validate the
+   * arguments in your code before calling your function.
+   */
+  arguments: string;
+
+  /**
+   * The name of the function to call.
+   */
+  name: string;
+}
+
+export interface MessageToolCall {
+  /**
+   * The function that the model called.
+   */
+  function: ToolFunction;
+
+  /**
+   * The ID of the tool call.
+   */
+  id: string;
+
+  /**
+   * The type of the tool. Currently, only `function` is supported.
+   */
+  type: 'function' | string;
+}
+
+export type MessageToolCallChunk = DeepPartial<MessageToolCall> & { index: number };
+
+export const MessageToolCallSchema = z.object({
+  function: z.object({
+    arguments: z.string(),
+    name: z.string(),
+  }),
+  id: z.string(),
+  type: z.string(),
+});
diff --git a/src/types/openai/chat.ts b/src/types/openai/chat.ts
index e56d9ad48c0d..7e4c9bd7fd08 100644
--- a/src/types/openai/chat.ts
+++ b/src/types/openai/chat.ts
@@ -1,4 +1,5 @@
 import { LLMRoleType } from '@/types/llm';
+import { MessageToolCall } from '@/types/message';
 
 import { OpenAIFunctionCall } from './functionCall';
 
@@ -23,6 +24,9 @@ export interface OpenAIChatMessage {
    */
   content: string | UserMessageContentPart[];
 
+  /**
+   * @deprecated
+   */
   function_call?: OpenAIFunctionCall;
   name?: string;
   /**
@@ -30,6 +34,8 @@ export interface OpenAIChatMessage {
    * @description 消息发送者的角色
    */
   role: LLMRoleType;
+  tool_call_id?: string;
+  tool_calls?: MessageToolCall[];
 }
 
 /**
diff --git a/src/utils/fetch.test.ts b/src/utils/fetch.test.ts
index 57187b283e62..954c08551987 100644
--- a/src/utils/fetch.test.ts
+++ b/src/utils/fetch.test.ts
@@ -2,7 +2,7 @@ import { afterEach, describe, expect, it, vi } from 'vitest';
 
 import { ErrorResponse } from '@/types/fetch';
 
-import { getMessageError } from './fetch';
+import { getMessageError, parseToolCalls } from './fetch';
 
 // 模拟 i18next
 vi.mock('i18next', () => ({
@@ -77,3 +77,94 @@ describe('getMessageError', () => {
     expect(mockResponse.json).toHaveBeenCalled();
   });
 });
+
+describe('parseToolCalls', () => {
+  it('should create add new item', () => {
+    const chunk = [
+      { index: 0, id: '1', type: 'function', function: { name: 'func', arguments: '' } },
+    ];
+
+    const result = parseToolCalls([], chunk);
+    expect(result).toEqual([
+      { id: '1', type: 'function', function: { name: 'func', arguments: '' } },
+    ]);
+  });
+
+  it('should update arguments if there is a toolCall', () => {
+    const origin = [{ id: '1', type: 'function', function: { name: 'func', arguments: '' } }];
+
+    const chunk1 = [{ index: 0, function: { arguments: '{"lo' } }];
+
+    const result1 = parseToolCalls(origin, chunk1);
+    expect(result1).toEqual([
+      { id: '1', type: 'function', function: { name: 'func', arguments: '{"lo' } },
+    ]);
+
+    const chunk2 = [{ index: 0, function: { arguments: 'cation\\": \\"Hangzhou\\"}' } }];
+    const result2 = parseToolCalls(result1, chunk2);
+
+    expect(result2).toEqual([
+      {
+        id: '1',
+        type: 'function',
+        function: { name: 'func', arguments: '{"location\\": \\"Hangzhou\\"}' },
+      },
+    ]);
+  });
+
+  it('should add a new tool call if the index is different', () => {
+    const origin = [
+      {
+        id: '1',
+        type: 'function',
+        function: { name: 'func', arguments: '{"location\\": \\"Hangzhou\\"}' },
+      },
+    ];
+
+    const chunk = [
+      {
+        index: 1,
+        id: '2',
+        type: 'function',
+        function: { name: 'func', arguments: '' },
+      },
+    ];
+
+    const result1 = parseToolCalls(origin, chunk);
+    expect(result1).toEqual([
+      {
+        id: '1',
+        type: 'function',
+        function: { name: 'func', arguments: '{"location\\": \\"Hangzhou\\"}' },
+      },
+      { id: '2', type: 'function', function: { name: 'func', arguments: '' } },
+    ]);
+  });
+
+  it('should update correct arguments if there are multi tool calls', () => {
+    const origin = [
+      {
+        id: '1',
+        type: 'function',
+        function: { name: 'func', arguments: '{"location\\": \\"Hangzhou\\"}' },
+      },
+      { id: '2', type: 'function', function: { name: 'func', arguments: '' } },
+    ];
+
+    const chunk = [{ index: 1, function: { arguments: '{"location\\": \\"Beijing\\"}' } }];
+
+    const result1 = parseToolCalls(origin, chunk);
+    expect(result1).toEqual([
+      {
+        id: '1',
+        type: 'function',
+        function: { name: 'func', arguments: '{"location\\": \\"Hangzhou\\"}' },
+      },
+      {
+        id: '2',
+        type: 'function',
+        function: { name: 'func', arguments: '{"location\\": \\"Beijing\\"}' },
+      },
+    ]);
+  });
+});
diff --git a/src/utils/fetch.ts b/src/utils/fetch.ts
index dcf549dbb72f..5ffc062cc990 100644
--- a/src/utils/fetch.ts
+++ b/src/utils/fetch.ts
@@ -1,8 +1,15 @@
+import { fetchEventSource } from '@microsoft/fetch-event-source';
 import { t } from 'i18next';
+import { produce } from 'immer';
 
 import { LOBE_CHAT_OBSERVATION_ID, LOBE_CHAT_TRACE_ID } from '@/const/trace';
 import { ErrorResponse, ErrorType } from '@/types/fetch';
-import { ChatMessageError } from '@/types/message';
+import {
+  ChatMessageError,
+  MessageToolCall,
+  MessageToolCallChunk,
+  MessageToolCallSchema,
+} from '@/types/message';
 
 export const getMessageError = async (response: Response) => {
   let chatMessageError: ChatMessageError;
@@ -32,68 +39,115 @@ export type OnFinishHandler = (
   text: string,
   context: {
     observationId?: string | null;
+    toolCalls?: MessageToolCall[];
     traceId?: string | null;
     type?: SSEFinishType;
   },
 ) => Promise<void>;
 
+interface MessageTextChunk {
+  text: string;
+  type: 'text';
+}
+
+interface MessageToolCallsChunk {
+  tool_calls: MessageToolCall[];
+  type: 'tool_calls';
+}
+
 export interface FetchSSEOptions {
   onAbort?: (text: string) => Promise<void>;
   onErrorHandle?: (error: ChatMessageError) => void;
   onFinish?: OnFinishHandler;
-  onMessageHandle?: (text: string) => void;
+  onMessageHandle?: (chunk: MessageTextChunk | MessageToolCallsChunk) => void;
 }
 
+export const parseToolCalls = (origin: MessageToolCall[], value: MessageToolCallChunk[]) =>
+  produce(origin, (draft) => {
+    if (draft.length === 0) {
+      draft.push(...value.map((item) => MessageToolCallSchema.parse(item)));
+    } else {
+      value.forEach(({ index, ...item }) => {
+        if (!draft?.[index]) {
+          draft?.splice(index, 0, MessageToolCallSchema.parse(item));
+        } else {
+          if (item.function?.arguments) {
+            draft[index].function.arguments += item.function.arguments;
+          }
+        }
+      });
+    }
+  });
+
 /**
  * Fetch data using stream method
  */
-export const fetchSSE = async (fetchFn: () => Promise<Response>, options: FetchSSEOptions = {}) => {
-  const response = await fetchFn();
-
-  // 如果不 ok 说明有请求错误
-  if (!response.ok) {
-    const chatMessageError = await getMessageError(response);
-
-    options.onErrorHandle?.(chatMessageError);
-    return;
-  }
-
-  const returnRes = response.clone();
-
-  const data = response.body;
-
-  if (!data) return;
+// eslint-disable-next-line no-undef
+export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptions = {}) => {
   let output = '';
-  const reader = data.getReader();
-  const decoder = new TextDecoder();
+  let toolCalls: undefined | MessageToolCall[];
 
-  let done = false;
   let finishedType: SSEFinishType = 'done';
+  let response!: Response;
 
-  while (!done) {
-    try {
-      const { value, done: doneReading } = await reader.read();
-      done = doneReading;
-      const chunkValue = decoder.decode(value, { stream: true });
-
-      output += chunkValue;
-      options.onMessageHandle?.(chunkValue);
-    } catch (error) {
-      done = true;
-
-      if ((error as TypeError).name === 'AbortError') {
-        finishedType = 'abort';
-        options?.onAbort?.(output);
-      } else {
-        finishedType = 'error';
-        console.error(error);
-      }
-    }
-  }
+  try {
+    await fetchEventSource(url, {
+      body: options.body,
+      headers: options.headers as Record<string, string>,
+      method: options.method,
+      onerror: (error) => {
+        if ((error as TypeError).name === 'AbortError') {
+          finishedType = 'abort';
+          options?.onAbort?.(output);
+        } else {
+          finishedType = 'error';
+          console.error(error);
+        }
+        throw new Error('Fetch error');
+        // options.onErrorHandle()
+      },
+      onmessage: (ev) => {
+        const data = JSON.parse(ev.data);
+        switch (ev.event) {
+          case 'text': {
+            output += data;
+            options.onMessageHandle?.({ text: data, type: 'text' });
+            break;
+          }
+
+          case 'tool_calls': {
+            if (!toolCalls) {
+              toolCalls = [];
+            }
+
+            toolCalls = parseToolCalls(toolCalls, data);
+
+            options.onMessageHandle?.({
+              tool_calls: toolCalls,
+              type: 'tool_calls',
+            });
+          }
+        }
+      },
+      onopen: async (res) => {
+        response = res.clone();
+
+        // 如果不 ok 说明有请求错误
+        if (!response.ok) {
+          const chatMessageError = await getMessageError(res);
+
+          options.onErrorHandle?.(chatMessageError);
+          return;
+        }
+      },
+
+      signal: options.signal,
+    });
+  } catch {}
 
   const traceId = response.headers.get(LOBE_CHAT_TRACE_ID);
   const observationId = response.headers.get(LOBE_CHAT_OBSERVATION_ID);
-  await options?.onFinish?.(output, { observationId, traceId, type: finishedType });
+  await options?.onFinish?.(output, { observationId, toolCalls, traceId, type: finishedType });
 
-  return returnRes;
+  return response;
 };

From 24543c1b32b82765ae175913c22f9a69e8a9c77e Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Wed, 8 May 2024 00:07:32 +0800
Subject: [PATCH 02/24] =?UTF-8?q?=F0=9F=92=84=20style:=20design=20the=20To?=
 =?UTF-8?q?ol=20Call=20UI?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/database/client/schemas/message.ts        |   4 +-
 src/features/Conversation/Actions/Tool.tsx    |  16 +++
 src/features/Conversation/Actions/index.ts    |   3 +-
 .../Messages/Assistant/ToolCalls/index.tsx    |  84 +++++++------
 .../Conversation/Messages/Assistant/index.tsx |  17 ++-
 .../Conversation/Messages/Default.tsx         |   5 +-
 .../Conversation/Messages/Function.tsx        |  35 ------
 .../Tool}/Inspector/PluginResultJSON.tsx      |   0
 .../ToolCalls => Tool/Inspector}/Settings.tsx |   0
 .../Tool}/Inspector/index.tsx                 |  69 ++++++-----
 .../Tool}/Inspector/style.ts                  |   0
 .../Conversation/Messages/Tool/index.tsx      |  41 ++++---
 src/features/Conversation/Messages/index.ts   |   3 +-
 .../Plugins/Inspector/Settings.tsx            |  39 ------
 .../Plugins/Render/StandaloneType/Iframe.tsx  |   2 +-
 .../components/ChatItem/index.tsx             |   4 -
 .../Conversation/components/SkeletonList.tsx  |   4 +-
 src/features/Conversation/index.tsx           |   5 +-
 src/locales/default/plugin.ts                 |   5 +-
 src/services/__tests__/chat.test.ts           |   2 +-
 src/services/chat.ts                          |  70 +++++++----
 src/store/chat/slices/message/action.test.ts  | 111 ++++++------------
 src/store/chat/slices/message/action.ts       |  33 ++++--
 .../chat/slices/message/selectors.test.ts     | 106 ++++++-----------
 src/store/chat/slices/message/selectors.ts    |  13 --
 src/store/chat/slices/plugin/action.test.ts   |  35 ++++--
 src/store/chat/slices/plugin/action.ts        |  67 ++++++-----
 src/store/chat/slices/topic/action.ts         |   9 +-
 src/store/tool/selectors/tool.ts              |  30 +----
 src/types/message/index.ts                    |   9 +-
 src/utils/fetch.ts                            |  29 ++++-
 src/utils/toolCall.ts                         |  21 ++++
 32 files changed, 414 insertions(+), 457 deletions(-)
 create mode 100644 src/features/Conversation/Actions/Tool.tsx
 delete mode 100644 src/features/Conversation/Messages/Function.tsx
 rename src/features/Conversation/{Plugins => Messages/Tool}/Inspector/PluginResultJSON.tsx (100%)
 rename src/features/Conversation/Messages/{Assistant/ToolCalls => Tool/Inspector}/Settings.tsx (100%)
 rename src/features/Conversation/{Plugins => Messages/Tool}/Inspector/index.tsx (73%)
 rename src/features/Conversation/{Plugins => Messages/Tool}/Inspector/style.ts (100%)
 delete mode 100644 src/features/Conversation/Plugins/Inspector/Settings.tsx
 create mode 100644 src/utils/toolCall.ts

diff --git a/src/database/client/schemas/message.ts b/src/database/client/schemas/message.ts
index 288606953581..ba4d2cfe493b 100644
--- a/src/database/client/schemas/message.ts
+++ b/src/database/client/schemas/message.ts
@@ -1,7 +1,6 @@
 /* eslint-disable sort-keys-fix/sort-keys-fix  */
 import { z } from 'zod';
 
-import { MessageToolCallSchema } from '@/types/message';
 
 const TranslateSchema = z.object({
   from: z.string().optional(),
@@ -27,9 +26,8 @@ export const DB_MessageSchema = z.object({
   favorite: z.number().int().min(0).max(1).optional(),
   error: z.any().optional(),
 
-  tool_calls: z.array(MessageToolCallSchema).optional(),
   tools: z.array(ToolCallSchema).optional(),
-  tool: ToolCallSchema.optional(),
+  tool_call_id: z.string().optional(),
 
   plugin: PluginSchema.optional(),
   pluginState: z.any().optional(),
diff --git a/src/features/Conversation/Actions/Tool.tsx b/src/features/Conversation/Actions/Tool.tsx
new file mode 100644
index 000000000000..d1f022c76b1a
--- /dev/null
+++ b/src/features/Conversation/Actions/Tool.tsx
@@ -0,0 +1,16 @@
+import { memo } from 'react';
+
+import { RenderAction } from '../types';
+
+export const ToolActionsBar: RenderAction = memo(() => {
+  return undefined;
+  // const { regenerate } = useChatListActionsBar();
+  // return (
+  //   <ActionIconGroup
+  //     dropdownMenu={[regenerate]}
+  //     items={[regenerate]}
+  //     onActionClick={onActionClick}
+  //     type="ghost"
+  //   />
+  // );
+});
diff --git a/src/features/Conversation/Actions/index.ts b/src/features/Conversation/Actions/index.ts
index 71feebab93b2..dfa380401829 100644
--- a/src/features/Conversation/Actions/index.ts
+++ b/src/features/Conversation/Actions/index.ts
@@ -9,13 +9,14 @@ import { OnActionsClick, RenderAction } from '../types';
 import { AssistantActionsBar } from './Assistant';
 import { DefaultActionsBar } from './Fallback';
 import { FunctionActionsBar } from './Function';
+import { ToolActionsBar } from './Tool';
 import { UserActionsBar } from './User';
 
 export const renderActions: Record<LLMRoleType, RenderAction> = {
   assistant: AssistantActionsBar,
   function: FunctionActionsBar,
   system: DefaultActionsBar,
-  tool: FunctionActionsBar,
+  tool: ToolActionsBar,
   user: UserActionsBar,
 };
 
diff --git a/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx b/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx
index 681778525e1f..980982f2ac99 100644
--- a/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx
+++ b/src/features/Conversation/Messages/Assistant/ToolCalls/index.tsx
@@ -1,11 +1,12 @@
-import { Loading3QuartersOutlined } from '@ant-design/icons';
 import { Avatar, Highlighter, Icon } from '@lobehub/ui';
 import isEqual from 'fast-deep-equal';
-import { LucideChevronDown, LucideChevronUp, LucideToyBrick } from 'lucide-react';
+import { Loader2, LucideChevronDown, LucideChevronRight, LucideToyBrick } from 'lucide-react';
 import { memo, useState } from 'react';
 import { useTranslation } from 'react-i18next';
-import { Flexbox } from 'react-layout-kit';
+import { Center, Flexbox } from 'react-layout-kit';
 
+import { useChatStore } from '@/store/chat';
+import { chatSelectors } from '@/store/chat/slices/message/selectors';
 import { pluginHelpers, useToolStore } from '@/store/tool';
 import { toolSelectors } from '@/store/tool/selectors';
 
@@ -14,59 +15,64 @@ import { useStyles } from './style';
 export interface InspectorProps {
   arguments?: string;
   identifier: string;
-  loading?: boolean;
+  messageId: string;
 }
 
-const Inspector = memo<InspectorProps>(({ arguments: requestArgs = '{}', loading, identifier }) => {
-  const { t } = useTranslation('plugin');
-  const { styles } = useStyles();
-  const [open, setOpen] = useState(false);
+const CallItem = memo<InspectorProps>(
+  ({ arguments: requestArgs = '{}', messageId, identifier }) => {
+    const { t } = useTranslation('plugin');
+    const { styles } = useStyles();
+    const [open, setOpen] = useState(false);
+    const loading = useChatStore(chatSelectors.isMessageGenerating(messageId));
 
-  const pluginMeta = useToolStore(toolSelectors.getMetaById(identifier), isEqual);
+    const pluginMeta = useToolStore(toolSelectors.getMetaById(identifier), isEqual);
 
-  const pluginAvatar = pluginHelpers.getPluginAvatar(pluginMeta);
+    const pluginAvatar = pluginHelpers.getPluginAvatar(pluginMeta);
 
-  const pluginTitle = pluginHelpers.getPluginTitle(pluginMeta) ?? t('plugins.loading');
+    const pluginTitle = pluginHelpers.getPluginTitle(pluginMeta) ?? t('unknownPlugin');
 
-  const avatar = pluginAvatar ? (
-    <Avatar avatar={pluginAvatar} size={32} />
-  ) : (
-    <Icon icon={LucideToyBrick} />
-  );
+    const avatar = pluginAvatar ? (
+      <Avatar avatar={pluginAvatar} size={32} />
+    ) : (
+      <Icon icon={LucideToyBrick} />
+    );
 
-  let params;
-  try {
-    params = JSON.stringify(JSON.parse(requestArgs), null, 2);
-  } catch {
-    params = requestArgs;
-  }
+    let params;
+    try {
+      params = JSON.stringify(JSON.parse(requestArgs), null, 2);
+    } catch {
+      params = requestArgs;
+    }
 
-  return (
-    <Flexbox gap={8}>
-      <Flexbox align={'center'} distribution={'space-between'} gap={24} horizontal>
+    return (
+      <Flexbox gap={8}>
         <Flexbox
           align={'center'}
           className={styles.container}
+          distribution={'space-between'}
           gap={8}
+          height={32}
           horizontal
           onClick={() => {
             setOpen(!open);
           }}
         >
-          {loading ? (
-            <div>
-              <Loading3QuartersOutlined spin />
-            </div>
-          ) : (
-            avatar
-          )}
-          {pluginTitle}
-          <Icon icon={open ? LucideChevronUp : LucideChevronDown} />
+          <Flexbox align={'center'} gap={8} horizontal>
+            {loading ? (
+              <Center height={30} width={24}>
+                <Icon icon={Loader2} spin />
+              </Center>
+            ) : (
+              avatar
+            )}
+            {pluginTitle}
+          </Flexbox>
+          <Icon icon={open ? LucideChevronDown : LucideChevronRight} />
         </Flexbox>
+        {(open || loading) && <Highlighter language={'json'}>{params}</Highlighter>}
       </Flexbox>
-      {open && <Highlighter language={'json'}>{params}</Highlighter>}
-    </Flexbox>
-  );
-});
+    );
+  },
+);
 
-export default Inspector;
+export default CallItem;
diff --git a/src/features/Conversation/Messages/Assistant/index.tsx b/src/features/Conversation/Messages/Assistant/index.tsx
index d9ed3b78fb98..8f6afa3f4559 100644
--- a/src/features/Conversation/Messages/Assistant/index.tsx
+++ b/src/features/Conversation/Messages/Assistant/index.tsx
@@ -1,6 +1,7 @@
 import { ReactNode, memo } from 'react';
 import { Flexbox } from 'react-layout-kit';
 
+import { LOADING_FLAT } from '@/const/message';
 import { useChatStore } from '@/store/chat';
 import { chatSelectors } from '@/store/chat/selectors';
 import { ChatMessage } from '@/types/message';
@@ -14,19 +15,29 @@ export const AssistantMessage = memo<
   }
 >(({ id, tools, content, ...props }) => {
   const editing = useChatStore(chatSelectors.isMessageEditing(id));
+  const generating = useChatStore(chatSelectors.isMessageGenerating(id));
+
+  const isToolCallGenerating = generating && (content === LOADING_FLAT || !content) && !!tools;
 
   return (
     <Flexbox gap={8} id={id}>
       {(content || editing) && (
-        <DefaultMessage content={content} id={undefined as any} {...props} />
+        <DefaultMessage
+          content={content}
+          // we have id above, so don't need to pass it again
+          id={undefined as any}
+          isToolCallGenerating={isToolCallGenerating}
+          {...props}
+        />
       )}
-      {!editing && (
+      {!editing && tools && (
         <Flexbox gap={8} horizontal>
-          {tools?.map((toolCall) => (
+          {tools.map((toolCall) => (
             <ToolCalls
               arguments={toolCall.arguments}
               identifier={toolCall.identifier}
               key={toolCall.id}
+              messageId={id}
             />
           ))}
         </Flexbox>
diff --git a/src/features/Conversation/Messages/Default.tsx b/src/features/Conversation/Messages/Default.tsx
index 6d0cbe33fa04..bc62fc16c7d1 100644
--- a/src/features/Conversation/Messages/Default.tsx
+++ b/src/features/Conversation/Messages/Default.tsx
@@ -8,8 +8,11 @@ import BubblesLoading from '../components/BubblesLoading';
 export const DefaultMessage = memo<
   ChatMessage & {
     editableContent: ReactNode;
+    isToolCallGenerating?: boolean;
   }
->(({ id, editableContent, content }) => {
+>(({ id, editableContent, content, isToolCallGenerating }) => {
+  if (isToolCallGenerating) return;
+
   if (content === LOADING_FLAT) return <BubblesLoading />;
 
   return <div id={id}>{editableContent}</div>;
diff --git a/src/features/Conversation/Messages/Function.tsx b/src/features/Conversation/Messages/Function.tsx
deleted file mode 100644
index ba90dba5e5f8..000000000000
--- a/src/features/Conversation/Messages/Function.tsx
+++ /dev/null
@@ -1,35 +0,0 @@
-import isEqual from 'fast-deep-equal';
-import { memo, useState } from 'react';
-import { Flexbox } from 'react-layout-kit';
-
-import { useChatStore } from '@/store/chat';
-import { chatSelectors } from '@/store/chat/selectors';
-import { ChatMessage } from '@/types/message';
-
-import Inspector from '../Plugins/Inspector';
-import PluginRender from '../Plugins/Render';
-
-export const FunctionMessage = memo<ChatMessage>(({ id, content, plugin }) => {
-  const fcProps = useChatStore(
-    chatSelectors.getFunctionMessageProps({ content, id, plugin }),
-    isEqual,
-  );
-
-  const [showRender, setShow] = useState(true);
-
-  return (
-    <Flexbox gap={12} id={id} width={'100%'}>
-      <Inspector showRender={showRender} {...fcProps} setShow={setShow} />
-      {showRender && (
-        <PluginRender
-          content={content}
-          id={id}
-          identifier={plugin?.identifier}
-          loading={fcProps.loading}
-          payload={fcProps.command}
-          type={fcProps.type}
-        />
-      )}
-    </Flexbox>
-  );
-});
diff --git a/src/features/Conversation/Plugins/Inspector/PluginResultJSON.tsx b/src/features/Conversation/Messages/Tool/Inspector/PluginResultJSON.tsx
similarity index 100%
rename from src/features/Conversation/Plugins/Inspector/PluginResultJSON.tsx
rename to src/features/Conversation/Messages/Tool/Inspector/PluginResultJSON.tsx
diff --git a/src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx b/src/features/Conversation/Messages/Tool/Inspector/Settings.tsx
similarity index 100%
rename from src/features/Conversation/Messages/Assistant/ToolCalls/Settings.tsx
rename to src/features/Conversation/Messages/Tool/Inspector/Settings.tsx
diff --git a/src/features/Conversation/Plugins/Inspector/index.tsx b/src/features/Conversation/Messages/Tool/Inspector/index.tsx
similarity index 73%
rename from src/features/Conversation/Plugins/Inspector/index.tsx
rename to src/features/Conversation/Messages/Tool/Inspector/index.tsx
index 5eb9e5cc4bb3..9cf85dfcfbd5 100644
--- a/src/features/Conversation/Plugins/Inspector/index.tsx
+++ b/src/features/Conversation/Messages/Tool/Inspector/index.tsx
@@ -1,13 +1,12 @@
 import { Loading3QuartersOutlined } from '@ant-design/icons';
-import { LobePluginType } from '@lobehub/chat-plugin-sdk';
-import { ActionIcon, Avatar, Highlighter, Icon } from '@lobehub/ui';
+import { ActionIcon, Avatar, Highlighter, Icon, Tag } from '@lobehub/ui';
 import { Tabs } from 'antd';
 import isEqual from 'fast-deep-equal';
 import {
   LucideBug,
   LucideBugOff,
   LucideChevronDown,
-  LucideChevronUp,
+  LucideChevronRight,
   LucideToyBrick,
 } from 'lucide-react';
 import { memo, useState } from 'react';
@@ -16,6 +15,7 @@ import { Flexbox } from 'react-layout-kit';
 
 import { pluginHelpers, useToolStore } from '@/store/tool';
 import { pluginSelectors, toolSelectors } from '@/store/tool/selectors';
+import { ChatPluginPayload } from '@/types/message';
 
 import PluginResult from './PluginResultJSON';
 import Settings from './Settings';
@@ -23,36 +23,34 @@ import { useStyles } from './style';
 
 export interface InspectorProps {
   arguments?: string;
-  command?: any;
   content: string;
-  id?: string;
+  identifier?: string;
   loading?: boolean;
+  payload?: ChatPluginPayload;
   setShow?: (showRender: boolean) => void;
   showRender?: boolean;
-  type?: LobePluginType;
 }
 
 const Inspector = memo<InspectorProps>(
   ({
     arguments: requestArgs = '{}',
-    command,
+    payload,
     showRender,
     loading,
     setShow,
     content,
-    id = 'unknown',
-    // type,
+    identifier = 'unknown',
   }) => {
     const { t } = useTranslation('plugin');
     const { styles } = useStyles();
     const [open, setOpen] = useState(false);
 
-    const pluginMeta = useToolStore(toolSelectors.getMetaById(id), isEqual);
+    const pluginMeta = useToolStore(toolSelectors.getMetaById(identifier), isEqual);
 
-    const showRightAction = useToolStore(pluginSelectors.isPluginHasUI(id));
+    const showRightAction = useToolStore(pluginSelectors.isPluginHasUI(identifier));
     const pluginAvatar = pluginHelpers.getPluginAvatar(pluginMeta);
 
-    const pluginTitle = pluginHelpers.getPluginTitle(pluginMeta) ?? t('plugins.loading');
+    const pluginTitle = pluginHelpers.getPluginTitle(pluginMeta) ?? t('unknownPlugin');
 
     const avatar = pluginAvatar ? (
       <Avatar avatar={pluginAvatar} size={32} />
@@ -62,7 +60,7 @@ const Inspector = memo<InspectorProps>(
 
     let args, params;
     try {
-      args = JSON.stringify(command, null, 2);
+      args = JSON.stringify(payload, null, 2);
       params = JSON.stringify(JSON.parse(requestArgs), null, 2);
     } catch {
       args = '';
@@ -81,29 +79,30 @@ const Inspector = memo<InspectorProps>(
               setShow?.(!showRender);
             }}
           >
-            {loading ? (
-              <div>
-                <Loading3QuartersOutlined spin />
-              </div>
-            ) : (
-              avatar
-            )}
-            {pluginTitle}
-            {showRightAction && <Icon icon={showRender ? LucideChevronUp : LucideChevronDown} />}
-          </Flexbox>
-          {
-            <Flexbox horizontal>
-              {/*{type === 'standalone' && <ActionIcon icon={LucideOrbit} />}*/}
-              <ActionIcon
-                icon={open ? LucideBugOff : LucideBug}
-                onClick={() => {
-                  setOpen(!open);
-                }}
-                title={t(open ? 'debug.off' : 'debug.on')}
-              />
-              <Settings id={id} />
+            <Flexbox align={'center'} gap={8} horizontal>
+              {loading ? (
+                <div>
+                  <Loading3QuartersOutlined spin />
+                </div>
+              ) : (
+                avatar
+              )}
+              <div>{pluginTitle}</div>
+              <Tag>{payload?.apiName}</Tag>
             </Flexbox>
-          }
+            {showRightAction && <Icon icon={showRender ? LucideChevronDown : LucideChevronRight} />}
+          </Flexbox>
+
+          <Flexbox horizontal>
+            <ActionIcon
+              icon={open ? LucideBugOff : LucideBug}
+              onClick={() => {
+                setOpen(!open);
+              }}
+              title={t(open ? 'debug.off' : 'debug.on')}
+            />
+            <Settings id={identifier} />
+          </Flexbox>
         </Flexbox>
         {open && (
           <Tabs
diff --git a/src/features/Conversation/Plugins/Inspector/style.ts b/src/features/Conversation/Messages/Tool/Inspector/style.ts
similarity index 100%
rename from src/features/Conversation/Plugins/Inspector/style.ts
rename to src/features/Conversation/Messages/Tool/Inspector/style.ts
diff --git a/src/features/Conversation/Messages/Tool/index.tsx b/src/features/Conversation/Messages/Tool/index.tsx
index 86683a46cca9..ad0fbf8907c9 100644
--- a/src/features/Conversation/Messages/Tool/index.tsx
+++ b/src/features/Conversation/Messages/Tool/index.tsx
@@ -1,38 +1,43 @@
-import isEqual from 'fast-deep-equal';
+import { Snippet } from '@lobehub/ui';
 import { memo, useState } from 'react';
 import { Flexbox } from 'react-layout-kit';
 
-import { LOADING_FLAT } from '@/const/message';
 import { useChatStore } from '@/store/chat';
 import { chatSelectors } from '@/store/chat/selectors';
 import { ChatMessage } from '@/types/message';
 
-import Inspector from '../../Plugins/Inspector';
 import PluginRender from '../../Plugins/Render';
-import BubblesLoading from '../../components/BubblesLoading';
+import Inspector from './Inspector';
 
-export const ToolMessage = memo<ChatMessage>(({ id, content, tool }) => {
-  const fcProps = useChatStore(
-    chatSelectors.getFunctionMessageProps({ content, id, plugin: tool }),
-    isEqual,
-  );
-
-  const [showRender, setShow] = useState(true);
+export const ToolMessage = memo<ChatMessage>(({ id, content, plugin }) => {
+  const loading = useChatStore(chatSelectors.isMessageGenerating(id));
 
-  if (content === LOADING_FLAT) return <BubblesLoading />;
+  const [showRender, setShow] = useState(plugin?.type !== 'default');
 
   return (
     <Flexbox gap={12} id={id} width={'100%'}>
-      <Inspector showRender={showRender} {...fcProps} setShow={setShow} />
-      {showRender && (
+      <Inspector
+        arguments={plugin?.arguments}
+        content={content}
+        identifier={plugin?.identifier}
+        loading={loading}
+        payload={plugin}
+        setShow={setShow}
+        showRender={showRender}
+      />
+      {showRender || loading ? (
         <PluginRender
           content={content}
           id={id}
-          identifier={tool?.identifier}
-          loading={fcProps.loading}
-          payload={fcProps.command}
-          type={fcProps.type}
+          identifier={plugin?.identifier}
+          loading={loading}
+          payload={plugin}
+          type={plugin?.type}
         />
+      ) : (
+        <Flexbox>
+          <Snippet>{plugin?.arguments || ''}</Snippet>
+        </Flexbox>
       )}
     </Flexbox>
   );
diff --git a/src/features/Conversation/Messages/index.ts b/src/features/Conversation/Messages/index.ts
index 7a0469eef0c9..2abb0fd138d8 100644
--- a/src/features/Conversation/Messages/index.ts
+++ b/src/features/Conversation/Messages/index.ts
@@ -6,14 +6,13 @@ import { sessionSelectors } from '@/store/session/selectors';
 import { OnAvatarsClick, RenderMessage } from '../types';
 import { AssistantMessage } from './Assistant';
 import { DefaultMessage } from './Default';
-import { FunctionMessage } from './Function';
 import { ToolMessage } from './Tool';
 import { UserMessage } from './User';
 
 export const renderMessages: Record<string, RenderMessage> = {
   assistant: AssistantMessage,
   default: DefaultMessage,
-  function: FunctionMessage,
+  function: DefaultMessage,
   tool: ToolMessage,
   user: UserMessage,
 };
diff --git a/src/features/Conversation/Plugins/Inspector/Settings.tsx b/src/features/Conversation/Plugins/Inspector/Settings.tsx
deleted file mode 100644
index e9183dfe24cf..000000000000
--- a/src/features/Conversation/Plugins/Inspector/Settings.tsx
+++ /dev/null
@@ -1,39 +0,0 @@
-import { ActionIcon } from '@lobehub/ui';
-import { LucideSettings } from 'lucide-react';
-import { memo, useState } from 'react';
-import { useTranslation } from 'react-i18next';
-
-import PluginDetailModal from '@/features/PluginDetailModal';
-import { pluginHelpers, useToolStore } from '@/store/tool';
-import { pluginSelectors } from '@/store/tool/selectors';
-
-const Settings = memo<{ id: string }>(({ id }) => {
-  const item = useToolStore(pluginSelectors.getPluginManifestById(id));
-  const [open, setOpen] = useState(false);
-  const { t } = useTranslation('plugin');
-  const hasSettings = pluginHelpers.isSettingSchemaNonEmpty(item?.settings);
-
-  return (
-    hasSettings && (
-      <>
-        <ActionIcon
-          icon={LucideSettings}
-          onClick={() => {
-            setOpen(true);
-          }}
-          title={t('setting')}
-        />
-        <PluginDetailModal
-          id={id}
-          onClose={() => {
-            setOpen(false);
-          }}
-          open={open}
-          schema={item?.settings}
-        />
-      </>
-    )
-  );
-});
-
-export default Settings;
diff --git a/src/features/Conversation/Plugins/Render/StandaloneType/Iframe.tsx b/src/features/Conversation/Plugins/Render/StandaloneType/Iframe.tsx
index 6f21a4a0d2bf..d2c9d86074c1 100644
--- a/src/features/Conversation/Plugins/Render/StandaloneType/Iframe.tsx
+++ b/src/features/Conversation/Plugins/Render/StandaloneType/Iframe.tsx
@@ -126,7 +126,7 @@ const IFrameRender = memo<IFrameRenderProps>(({ url, id, payload, width = 600, h
     // we need to know which message to trigger
     if (messageId !== id) return;
 
-    triggerAIMessage(id);
+    triggerAIMessage({ parentId: id });
   });
 
   // when plugin want to create an assistant message
diff --git a/src/features/Conversation/components/ChatItem/index.tsx b/src/features/Conversation/components/ChatItem/index.tsx
index 89545ee06fea..30eecbf31835 100644
--- a/src/features/Conversation/components/ChatItem/index.tsx
+++ b/src/features/Conversation/components/ChatItem/index.tsx
@@ -56,10 +56,6 @@ const Item = memo<ChatListItemProps>(({ index, id }) => {
   }, isEqual);
 
   const historyLength = useChatStore((s) => chatSelectors.currentChats(s).length);
-  const [editing, toggleMessageEditing] = useChatStore((s) => [
-    chatSelectors.isMessageEditing(id)(s),
-    s.toggleMessageEditing,
-  ]);
 
   const [isMessageLoading, generating, editing, toggleMessageEditing, updateMessageContent] =
     useChatStore((s) => [
diff --git a/src/features/Conversation/components/SkeletonList.tsx b/src/features/Conversation/components/SkeletonList.tsx
index bf6058e9a39b..d3ec5e868d28 100644
--- a/src/features/Conversation/components/SkeletonList.tsx
+++ b/src/features/Conversation/components/SkeletonList.tsx
@@ -34,14 +34,14 @@ const SkeletonList = memo<SkeletonListProps>(({ mobile }) => {
       <Skeleton
         active
         avatar={{ size: mobile ? 32 : 40 }}
-        className={styles.message}
+        className={cx(styles.message, styles.user)}
         paragraph={{ width: mobile ? ['80%', '40%'] : ['50%', '30%'] }}
         title={false}
       />
       <Skeleton
         active
         avatar={{ size: mobile ? 32 : 40 }}
-        className={cx(styles.message, styles.user)}
+        className={styles.message}
         paragraph={{ width: mobile ? ['80%', '40%'] : ['50%', '30%'] }}
         title={false}
       />
diff --git a/src/features/Conversation/index.tsx b/src/features/Conversation/index.tsx
index 09d2c265ede3..fad20011729f 100644
--- a/src/features/Conversation/index.tsx
+++ b/src/features/Conversation/index.tsx
@@ -1,9 +1,8 @@
-import { Suspense, lazy } from 'react';
+import { Suspense } from 'react';
 import { Flexbox } from 'react-layout-kit';
 
 import SkeletonList from './components/SkeletonList';
-
-const ChatList = lazy(() => import('./components/VirtualizedList'));
+import ChatList from './components/VirtualizedList';
 
 interface ConversationProps {
   mobile?: boolean;
diff --git a/src/locales/default/plugin.ts b/src/locales/default/plugin.ts
index cd8a58857183..65984041157d 100644
--- a/src/locales/default/plugin.ts
+++ b/src/locales/default/plugin.ts
@@ -131,10 +131,6 @@ export default {
     plugin: '插件运行中...',
   },
   pluginList: '插件列表',
-  plugins: {
-    loading: '工具检测中...',
-    unknown: '未知工具',
-  },
   setting: '插件设置',
   settings: {
     indexUrl: {
@@ -166,4 +162,5 @@ export default {
     },
     title: '插件商店',
   },
+  unknownPlugin: '未知插件',
 };
diff --git a/src/services/__tests__/chat.test.ts b/src/services/__tests__/chat.test.ts
index 15d0f9a665ad..b9f403533e2b 100644
--- a/src/services/__tests__/chat.test.ts
+++ b/src/services/__tests__/chat.test.ts
@@ -580,7 +580,7 @@ Get data from users`,
         body: JSON.stringify(expectedPayload),
         headers: expect.any(Object),
         method: 'POST',
-        signal: undefined,
+        signal: expect.any(AbortSignal),
       });
     });
 
diff --git a/src/services/chat.ts b/src/services/chat.ts
index 9af430a7f940..9db2af302bdf 100644
--- a/src/services/chat.ts
+++ b/src/services/chat.ts
@@ -21,10 +21,11 @@ import {
   userProfileSelectors,
 } from '@/store/user/selectors';
 import { ChatErrorType } from '@/types/fetch';
-import { ChatMessage } from '@/types/message';
+import { ChatMessage, MessageToolCall } from '@/types/message';
 import type { ChatStreamPayload, OpenAIChatMessage } from '@/types/openai/chat';
 import { UserMessageContentPart } from '@/types/openai/chat';
 import { FetchSSEOptions, fetchSSE, getMessageError } from '@/utils/fetch';
+import { genToolCallingName } from '@/utils/toolCall';
 import { createTraceHeader, getTraceId } from '@/utils/trace';
 
 import { createHeaderWithAuth, getProviderAuthPayload } from './_auth';
@@ -254,28 +255,33 @@ class ChatService {
     const enableFetchOnClient = modelConfigSelectors.isProviderFetchOnClient(provider)(
       useUserStore.getState(),
     );
-    /**
-     * Notes:
-     * 1. Broswer agent runtime will skip auth check if a key and endpoint provided by
-     *    user which will cause abuse of plugins services
-     * 2. This feature will disabled by default
-     */
+
+    let fetcher: typeof fetch | undefined = undefined;
+
     if (enableFetchOnClient) {
-      try {
-        return await this.fetchOnClient({ payload, provider, signal });
-      } catch (e) {
-        const {
-          errorType = ChatErrorType.BadRequest,
-          error: errorContent,
-          ...res
-        } = e as ChatCompletionErrorPayload;
-
-        const error = errorContent || e;
-        // track the error at server side
-        console.error(`Route: [${provider}] ${errorType}:`, error);
-
-        return createErrorResponse(errorType, { error, ...res, provider });
-      }
+      /**
+       * Notes:
+       * 1. Browser agent runtime will skip auth check if a key and endpoint provided by
+       *    user which will cause abuse of plugins services
+       * 2. This feature will be disabled by default
+       */
+      fetcher = async () => {
+        try {
+          return await this.fetchOnClient({ payload, provider, signal });
+        } catch (e) {
+          const {
+            errorType = ChatErrorType.BadRequest,
+            error: errorContent,
+            ...res
+          } = e as ChatCompletionErrorPayload;
+
+          const error = errorContent || e;
+          // track the error at server side
+          console.error(`Route: [${provider}] ${errorType}:`, error);
+
+          return createErrorResponse(errorType, { error, ...res, provider });
+        }
+      };
     }
 
     const traceHeader = createTraceHeader({ ...options?.trace });
@@ -287,6 +293,7 @@ class ChatService {
 
     return fetchSSE(API_ENDPOINTS.chat(provider), {
       body: JSON.stringify(payload),
+      fetcher: fetcher,
       headers,
       method: 'POST',
       onAbort: options?.onAbort,
@@ -410,7 +417,20 @@ class ChatService {
         }
 
         case 'assistant': {
-          return { content: m.content, role: m.role, tool_calls: m.tool_calls };
+          return {
+            content: m.content,
+            role: m.role,
+            tool_calls: m.tools?.map(
+              (tool): MessageToolCall => ({
+                function: {
+                  arguments: tool.arguments,
+                  name: genToolCallingName(tool.identifier, tool.apiName, tool.type),
+                },
+                id: tool.id,
+                type: tool.type,
+              }),
+            ),
+          };
         }
 
         // TODO: need to be removed after upgrade
@@ -422,9 +442,9 @@ class ChatService {
         case 'tool': {
           return {
             content: m.content,
-            name: m.tool_calls?.find((tool) => tool.id === m.tool?.id)?.function.name,
+            name: genToolCallingName(m.plugin!.identifier, m.plugin!.apiName, m.plugin?.type),
             role: m.role,
-            tool_call_id: m.tool?.id,
+            tool_call_id: m.tool_call_id,
           };
         }
 
diff --git a/src/store/chat/slices/message/action.test.ts b/src/store/chat/slices/message/action.test.ts
index d6d6f634ac3d..3c76c23fbce3 100644
--- a/src/store/chat/slices/message/action.test.ts
+++ b/src/store/chat/slices/message/action.test.ts
@@ -19,6 +19,7 @@ vi.stubGlobal(
   vi.fn(() => Promise.resolve(new Response('mock'))),
 );
 
+vi.mock('zustand/traditional');
 // Mock service
 vi.mock('@/services/message', () => ({
   messageService: {
@@ -47,12 +48,6 @@ vi.mock('@/services/chat', async (importOriginal) => {
   };
 });
 
-vi.mock('@/store/chat/selectors', () => ({
-  chatSelectors: {
-    currentChats: vi.fn(),
-  },
-}));
-
 const realCoreProcessMessage = useChatStore.getState().internal_coreProcessMessage;
 const realRefreshMessages = useChatStore.getState().refreshMessages;
 // Mock state
@@ -86,6 +81,9 @@ describe('chatMessage actions', () => {
       const messageId = 'message-id';
       const deleteSpy = vi.spyOn(result.current, 'deleteMessage');
 
+      act(() => {
+        useChatStore.setState({ messages: [{ id: messageId } as ChatMessage] });
+      });
       await act(async () => {
         await result.current.deleteMessage(messageId);
       });
@@ -259,11 +257,6 @@ describe('chatMessage actions', () => {
           enableAutoCreateTopic,
         }));
 
-        // Mock the currentChats selector to return a list that does not reach the threshold
-        (chatSelectors.currentChats as Mock).mockReturnValue(
-          Array.from({ length: autoCreateTopicThreshold + 1 }, (_, i) => ({ id: `msg-${i}` })),
-        );
-
         // Mock saveToTopic and switchTopic to simulate not being called
         const saveToTopicMock = vi.fn();
         const switchTopicMock = vi.fn();
@@ -271,6 +264,10 @@ describe('chatMessage actions', () => {
         await act(async () => {
           useChatStore.setState({
             ...mockState,
+            // Mock the currentChats selector to return a list that does not reach the threshold
+            messages: Array.from({ length: autoCreateTopicThreshold + 1 }, (_, i) => ({
+              id: `msg-${i}`,
+            })) as any,
             activeTopicId: undefined,
             saveToTopic: saveToTopicMock,
             switchTopic: switchTopicMock,
@@ -298,11 +295,6 @@ describe('chatMessage actions', () => {
         // Mock messageService.create to resolve with a message id
         (messageService.createMessage as Mock).mockResolvedValue('new-message-id');
 
-        // Mock the currentChats selector to return a list that reaches the threshold
-        (chatSelectors.currentChats as Mock).mockReturnValue(
-          Array.from({ length: autoCreateTopicThreshold }, (_, i) => ({ id: `msg-${i}` })),
-        );
-
         // Mock saveToTopic to resolve with a topic id and switchTopic to switch to the new topic
         const saveToTopicMock = vi.fn(() => Promise.resolve('new-topic-id'));
         const switchTopicMock = vi.fn();
@@ -310,6 +302,9 @@ describe('chatMessage actions', () => {
         act(() => {
           useChatStore.setState({
             ...mockState,
+            messages: Array.from({ length: autoCreateTopicThreshold }, (_, i) => ({
+              id: `msg-${i}`,
+            })) as any,
             activeTopicId: undefined,
             saveToTopic: saveToTopicMock,
             switchTopic: switchTopicMock,
@@ -339,11 +334,6 @@ describe('chatMessage actions', () => {
           enableAutoCreateTopic,
         }));
 
-        // Mock the currentChats selector to return a list that does not reach the threshold
-        (chatSelectors.currentChats as Mock).mockReturnValue(
-          Array.from({ length: autoCreateTopicThreshold - 1 }, (_, i) => ({ id: `msg-${i}` })),
-        );
-
         // Mock saveToTopic and switchTopic to simulate not being called
         const saveToTopicMock = vi.fn();
         const switchTopicMock = vi.fn();
@@ -351,6 +341,10 @@ describe('chatMessage actions', () => {
         await act(async () => {
           useChatStore.setState({
             ...mockState,
+            // Mock the currentChats selector to return a list that does not reach the threshold
+            messages: Array.from({ length: autoCreateTopicThreshold - 2 }, (_, i) => ({
+              id: `msg-${i}`,
+            })) as any,
             activeTopicId: undefined,
             saveToTopic: saveToTopicMock,
             switchTopic: switchTopicMock,
@@ -395,12 +389,14 @@ describe('chatMessage actions', () => {
       const { result } = renderHook(() => useChatStore());
       const messageId = 'message-id';
 
-      // Mock the currentChats selector to return a list that includes the message to be resent
-      (chatSelectors.currentChats as Mock).mockReturnValue([
-        // ... other messages
-        { id: messageId, role: 'user', content: 'Resend this message' },
-        // ... other messages
-      ]);
+      act(() => {
+        useChatStore.setState({
+          // Mock the currentChats selector to return a list that includes the message to be resent
+          messages: [
+            { id: messageId, role: 'user', content: 'Resend this message' } as ChatMessage,
+          ],
+        });
+      });
 
       // Mock the internal_coreProcessMessage function to resolve immediately
       mockState.internal_coreProcessMessage.mockResolvedValue(undefined);
@@ -421,10 +417,12 @@ describe('chatMessage actions', () => {
       const { result } = renderHook(() => useChatStore());
       const messageId = 'non-existing-message-id';
 
-      // Mock the currentChats selector to return a list that does not include the message to be resent
-      (chatSelectors.currentChats as Mock).mockReturnValue([
-        // ... other messages
-      ]);
+      act(() => {
+        useChatStore.setState({
+          // Mock the currentChats selector to return a list that does not include the message to be resent
+          messages: [],
+        });
+      });
 
       await act(async () => {
         await result.current.internal_resendMessage(messageId);
@@ -461,9 +459,8 @@ describe('chatMessage actions', () => {
 
       expect(internal_dispatchMessageSpy).toHaveBeenCalledWith({
         id: messageId,
-        key: 'content',
-        type: 'updateMessage',
-        value: newContent,
+        type: 'updateMessages',
+        value: { content: newContent },
       });
     });
 
@@ -649,44 +646,6 @@ describe('chatMessage actions', () => {
       });
     });
 
-    it('should handle function call message at start of AI response', async () => {
-      const { result } = renderHook(() => useChatStore());
-      const messages = [{ id: 'message-id', content: 'Hello', role: 'user' }] as ChatMessage[];
-      const assistantMessageId = 'assistant-message-id';
-      const aiResponse =
-        '{"tool_calls":[{"id":"call_sbca","type":"function","function":{"name":"pluginName____apiName","arguments":{"key":"value"}}}]}';
-
-      // Mock fetch to resolve with AI response containing function call
-      vi.mocked(fetch).mockResolvedValueOnce(new Response(aiResponse));
-
-      await act(async () => {
-        const response = await result.current.internal_fetchAIChatMessage(
-          messages,
-          assistantMessageId,
-        );
-        expect(response.isFunctionCall).toEqual(true);
-      });
-    });
-
-    it('should handle function message at end of AI response', async () => {
-      const { result } = renderHook(() => useChatStore());
-      const messages = [{ id: 'message-id', content: 'Hello', role: 'user' }] as ChatMessage[];
-      const assistantMessageId = 'assistant-message-id';
-      const aiResponse =
-        'Hello, human! {"tool_calls":[{"id":"call_sbca","type":"function","function":{"name":"pluginName____apiName","arguments":{"key":"value"}}}]}';
-
-      // Mock fetch to resolve with AI response containing function call at end
-      vi.mocked(fetch).mockResolvedValue(new Response(aiResponse));
-
-      await act(async () => {
-        const response = await result.current.internal_fetchAIChatMessage(
-          messages,
-          assistantMessageId,
-        );
-        expect(response.isFunctionCall).toEqual(true);
-      });
-    });
-
     it('should handle errors during AI response fetching', async () => {
       const { result } = renderHook(() => useChatStore());
       const messages = [{ id: 'message-id', content: 'Hello', role: 'user' }] as ChatMessage[];
@@ -697,9 +656,11 @@ describe('chatMessage actions', () => {
       vi.mocked(fetch).mockRejectedValue(new Error(errorMessage));
 
       await act(async () => {
-        await expect(
-          result.current.internal_fetchAIChatMessage(messages, assistantMessageId),
-        ).rejects.toThrow(errorMessage);
+        expect(
+          await result.current.internal_fetchAIChatMessage(messages, assistantMessageId),
+        ).toEqual({
+          isFunctionCall: false,
+        });
       });
     });
   });
diff --git a/src/store/chat/slices/message/action.ts b/src/store/chat/slices/message/action.ts
index 1409db34a38d..1bae1ab27fef 100644
--- a/src/store/chat/slices/message/action.ts
+++ b/src/store/chat/slices/message/action.ts
@@ -157,8 +157,29 @@ export const chatMessage: StateCreator<
   ChatMessageAction
 > = (set, get) => ({
   deleteMessage: async (id) => {
-    get().internal_dispatchMessage({ type: 'deleteMessage', id });
-    await messageService.removeMessage(id);
+    const message = chatSelectors.getMessageById(id)(get());
+    if (!message) return;
+
+    const deleteFn = async (id: string) => {
+      get().internal_dispatchMessage({ type: 'deleteMessage', id });
+      await messageService.removeMessage(id);
+    };
+
+    // if the message is a tool calls, then delete all the related messages
+    // TODO: maybe we need to delete it in the DB?
+    if (message.tools) {
+      const pools = message.tools
+        .flatMap((tool) => {
+          const messages = get().messages.filter((m) => m.tool_call_id === tool.id);
+
+          return messages.map((m) => m.id);
+        })
+        .map((i) => deleteFn(i));
+
+      await Promise.all(pools);
+    }
+
+    await deleteFn(id);
     await get().refreshMessages();
   },
   delAndRegenerateMessage: async (id) => {
@@ -470,10 +491,7 @@ export const chatMessage: StateCreator<
             internal_dispatchMessage({
               id: assistantId,
               type: 'updateMessages',
-              value: {
-                tool_calls: chunk.tool_calls,
-                tools: get().internal_transformToolCalls(chunk.tool_calls),
-              },
+              value: { tools: get().internal_transformToolCalls(chunk.tool_calls) },
             });
             isFunctionCall = true;
           }
@@ -582,7 +600,7 @@ export const chatMessage: StateCreator<
       internal_dispatchMessage({
         id,
         type: 'updateMessages',
-        value: { tool_calls: toolCalls, tools: internal_transformToolCalls(toolCalls) },
+        value: { tools: internal_transformToolCalls(toolCalls) },
       });
     } else {
       internal_dispatchMessage({ id, type: 'updateMessages', value: { content } });
@@ -590,7 +608,6 @@ export const chatMessage: StateCreator<
 
     await messageService.updateMessage(id, {
       content,
-      tool_calls: toolCalls,
       tools: toolCalls ? internal_transformToolCalls(toolCalls) : undefined,
     });
     await refreshMessages();
diff --git a/src/store/chat/slices/message/selectors.test.ts b/src/store/chat/slices/message/selectors.test.ts
index 989f6a80afc2..a3019500d826 100644
--- a/src/store/chat/slices/message/selectors.test.ts
+++ b/src/store/chat/slices/message/selectors.test.ts
@@ -35,12 +35,16 @@ const mockMessages = [
   {
     id: 'msg3',
     content: 'Function Message',
-    role: 'function',
-    plugin: {
-      arguments: ['arg1', 'arg2'],
-      identifier: 'func1',
-      type: 'pluginType',
-    },
+    role: 'tool',
+    tools: [
+      {
+        arguments: ['arg1', 'arg2'],
+        identifier: 'func1',
+        apiName: 'ttt',
+        type: 'pluginType',
+        id: 'abc',
+      },
+    ],
   },
 ] as ChatMessage[];
 
@@ -64,16 +68,22 @@ const mockedChats = [
   {
     id: 'msg3',
     content: 'Function Message',
-    role: 'function',
+    role: 'tool',
     meta: {
-      avatar: '🧩',
-      title: 'plugin-unknown',
-    },
-    plugin: {
-      arguments: ['arg1', 'arg2'],
-      identifier: 'func1',
-      type: 'pluginType',
+      avatar: '🤯',
+      backgroundColor: 'rgba(0,0,0,0)',
+      description: 'inbox.desc',
+      title: 'inbox.title',
     },
+    tools: [
+      {
+        arguments: ['arg1', 'arg2'],
+        identifier: 'func1',
+        apiName: 'ttt',
+        type: 'pluginType',
+        id: 'abc',
+      },
+    ],
   },
 ] as ChatMessage[];
 
@@ -103,52 +113,6 @@ describe('chatSelectors', () => {
     });
   });
 
-  describe('getFunctionMessageProps', () => {
-    it('should return the properties of a function message', () => {
-      const state = merge(initialStore, {
-        messages: mockMessages,
-        chatLoadingIds: ['msg3'], // Assuming this id represents a loading state
-      });
-      const props = chatSelectors.getFunctionMessageProps(mockMessages[2])(state);
-      expect(props).toEqual({
-        arguments: ['arg1', 'arg2'],
-        command: mockMessages[2].plugin,
-        content: 'Function Message',
-        id: 'func1',
-        loading: true,
-        type: 'pluginType',
-      });
-    });
-
-    it('should return loading as false if the message id is not the current loading id', () => {
-      const state = merge(initialStore, { messages: mockMessages, chatLoadingId: 'msg1' });
-      const props = chatSelectors.getFunctionMessageProps(mockMessages[2])(state);
-      expect(props.loading).toBe(false);
-    });
-
-    it('should return correct properties when no plugin is present', () => {
-      const messageWithoutPlugin = {
-        id: 'msg4',
-        content: 'No Plugin Message',
-        role: 'function',
-        // No plugin property
-      };
-      const state = merge(initialStore, {
-        messages: [...mockMessages, messageWithoutPlugin],
-        chatLoadingId: 'msg1',
-      });
-      const props = chatSelectors.getFunctionMessageProps(messageWithoutPlugin)(state);
-      expect(props).toEqual({
-        arguments: undefined,
-        command: undefined,
-        content: 'No Plugin Message',
-        id: undefined,
-        loading: false,
-        type: undefined,
-      });
-    });
-  });
-
   describe('currentChatsWithHistoryConfig', () => {
     it('should slice the messages according to the current agent config', () => {
       const state = merge(initialStore, { messages: mockMessages });
@@ -185,16 +149,22 @@ describe('chatSelectors', () => {
         {
           id: 'msg3',
           content: 'Function Message',
-          role: 'function',
+          role: 'tool',
           meta: {
-            avatar: '🧩',
-            title: 'plugin-unknown',
-          },
-          plugin: {
-            arguments: ['arg1', 'arg2'],
-            identifier: 'func1',
-            type: 'pluginType',
+            avatar: '🤯',
+            backgroundColor: 'rgba(0,0,0,0)',
+            description: 'inbox.desc',
+            title: 'inbox.title',
           },
+          tools: [
+            {
+              apiName: 'ttt',
+              arguments: ['arg1', 'arg2'],
+              identifier: 'func1',
+              id: 'abc',
+              type: 'pluginType',
+            },
+          ],
         },
       ]);
     });
diff --git a/src/store/chat/slices/message/selectors.ts b/src/store/chat/slices/message/selectors.ts
index 20bb131b8c13..44b829efeddd 100644
--- a/src/store/chat/slices/message/selectors.ts
+++ b/src/store/chat/slices/message/selectors.ts
@@ -1,4 +1,3 @@
-import { LobePluginType } from '@lobehub/chat-plugin-sdk';
 import { t } from 'i18next';
 
 import { DEFAULT_INBOX_AVATAR, DEFAULT_USER_AVATAR } from '@/const/meta';
@@ -108,17 +107,6 @@ const chatsMessageString = (s: ChatStore): string => {
   return chats.map((m) => m.content).join('');
 };
 
-const getFunctionMessageProps =
-  ({ plugin, content, id }: Pick<ChatMessage, 'plugin' | 'content' | 'id'>) =>
-  (s: ChatStore) => ({
-    arguments: plugin?.arguments,
-    command: plugin,
-    content,
-    id: plugin?.identifier,
-    loading: s.chatLoadingIds.includes(id),
-    type: plugin?.type as LobePluginType,
-  });
-
 const getMessageById = (id: string) => (s: ChatStore) => chatHelpers.getMessageById(s.messages, id);
 const getTraceIdByMessageId = (id: string) => (s: ChatStore) => getMessageById(id)(s)?.traceId;
 
@@ -139,7 +127,6 @@ export const chatSelectors = {
   currentChats,
   currentChatsWithGuideMessage,
   currentChatsWithHistoryConfig,
-  getFunctionMessageProps,
   getMessageById,
   getTraceIdByMessageId,
   isAIGenerating,
diff --git a/src/store/chat/slices/plugin/action.test.ts b/src/store/chat/slices/plugin/action.test.ts
index 35c6d3cbc26d..627bf70286bb 100644
--- a/src/store/chat/slices/plugin/action.test.ts
+++ b/src/store/chat/slices/plugin/action.test.ts
@@ -8,11 +8,13 @@ import { messageService } from '@/services/message';
 import { chatSelectors } from '@/store/chat/selectors';
 import { useChatStore } from '@/store/chat/store';
 import { useToolStore } from '@/store/tool';
-import { ChatToolPayload } from '@/types/message';
+import { ChatMessage, ChatToolPayload } from '@/types/message';
 import { LobeTool } from '@/types/tool';
 
 const invokeStandaloneTypePlugin = useChatStore.getState().invokeStandaloneTypePlugin;
 
+vi.mock('zustand/traditional');
+
 // Mock messageService
 vi.mock('@/services/message', () => ({
   messageService: {
@@ -73,6 +75,7 @@ describe('ChatPluginAction', () => {
       const initialState = {
         messages: [],
         coreProcessMessage: vi.fn(),
+        internal_coreProcessMessage: vi.fn(),
         refreshMessages: vi.fn(),
       };
       useChatStore.setState(initialState);
@@ -130,7 +133,6 @@ describe('ChatPluginAction', () => {
         content: pluginApiResponse,
       });
       expect(storeState.refreshMessages).toHaveBeenCalled();
-      expect(storeState.triggerAIMessage).toHaveBeenCalled();
       expect(storeState.internal_toggleChatLoading).toHaveBeenCalledWith(
         false,
         'message-id',
@@ -172,7 +174,7 @@ describe('ChatPluginAction', () => {
     });
   });
 
-  describe('triggerToolCalls', () => {
+  describe.skip('triggerToolCalls', () => {
     it('should trigger a function call and update the plugin message accordingly', async () => {
       const messageId = 'message-id';
       const messageContent = JSON.stringify({
@@ -196,21 +198,32 @@ describe('ChatPluginAction', () => {
 
       const refreshSpy = vi.spyOn(useChatStore.getState(), 'refreshMessages');
       const invokeSpy = vi.spyOn(useChatStore.getState(), 'invokeDefaultTypePlugin');
-      vi.spyOn(chatSelectors, 'getMessageById').mockImplementationOnce(
-        () => () =>
-          ({
-            id: messageId,
-            content: messageContent,
-          }) as any,
-      );
 
       const { result } = renderHook(() => useChatStore());
 
       await act(async () => {
+        useChatStore.setState({
+          runPluginApi: vi.fn(),
+          messages: [
+            {
+              id: messageId,
+              content: messageContent,
+              tools: [
+                {
+                  id: 'call_sbca',
+                  type: 'default',
+                  identifier: 'pluginName',
+                  apiName: 'apiName',
+                  arguments: "{ key: 'value' }",
+                },
+              ],
+            } as ChatMessage,
+          ],
+        });
+
         await result.current.triggerToolCalls(messageId);
       });
 
-      expect(chatSelectors.getMessageById).toHaveBeenCalledWith(messageId);
       expect(messageService.updateMessage).toHaveBeenCalledWith(messageId, {
         content: '',
         plugin: messagePluginPayload,
diff --git a/src/store/chat/slices/plugin/action.ts b/src/store/chat/slices/plugin/action.ts
index f928d5b6a2f4..6b5a57a879f2 100644
--- a/src/store/chat/slices/plugin/action.ts
+++ b/src/store/chat/slices/plugin/action.ts
@@ -30,7 +30,7 @@ export interface ChatPluginAction {
   invokeMarkdownTypePlugin: (id: string, payload: ChatToolPayload) => Promise<void>;
   invokeStandaloneTypePlugin: (id: string, payload: ChatToolPayload) => Promise<void>;
   runPluginApi: (id: string, payload: ChatToolPayload) => Promise<string | undefined>;
-  triggerAIMessage: (id: string, traceId?: string) => Promise<void>;
+  triggerAIMessage: (params: { parentId?: string; traceId?: string }) => Promise<void>;
   triggerToolCalls: (id: string) => Promise<void>;
 
   updatePluginState: (id: string, key: string, value: any) => Promise<void>;
@@ -60,36 +60,42 @@ export const chatPlugin: StateCreator<
 
     await internal_updateMessageContent(id, content);
 
-    if (triggerAiMessage) await triggerAIMessage(id);
+    if (triggerAiMessage) await triggerAIMessage({ parentId: id });
   },
 
   internal_transformToolCalls: (toolCalls) => {
-    return toolCalls.map((toolCall) => {
-      let payload: ChatToolPayload;
-
-      const [identifier, apiName, type] = toolCall.function.name.split(PLUGIN_SCHEMA_SEPARATOR);
-
-      payload = {
-        apiName,
-        arguments: toolCall.function.arguments,
-        id: toolCall.id,
-        identifier,
-        type: (type ?? 'default') as any,
-      };
-
-      // if the apiName is md5, try to find the correct apiName in the plugins
-      if (apiName.startsWith(PLUGIN_SCHEMA_API_MD5_PREFIX)) {
-        const md5 = apiName.replace(PLUGIN_SCHEMA_API_MD5_PREFIX, '');
-        const manifest = pluginSelectors.getPluginManifestById(identifier)(useToolStore.getState());
-
-        const api = manifest?.api.find((api) => Md5.hashStr(api.name).toString() === md5);
-        if (api) {
-          payload.apiName = api.name;
+    return toolCalls
+      .map((toolCall): ChatToolPayload | null => {
+        let payload: ChatToolPayload;
+
+        const [identifier, apiName, type] = toolCall.function.name.split(PLUGIN_SCHEMA_SEPARATOR);
+
+        if (!apiName) return null;
+
+        payload = {
+          apiName,
+          arguments: toolCall.function.arguments,
+          id: toolCall.id,
+          identifier,
+          type: (type ?? 'default') as any,
+        };
+
+        // if the apiName is md5, try to find the correct apiName in the plugins
+        if (apiName.startsWith(PLUGIN_SCHEMA_API_MD5_PREFIX)) {
+          const md5 = apiName.replace(PLUGIN_SCHEMA_API_MD5_PREFIX, '');
+          const manifest = pluginSelectors.getPluginManifestById(identifier)(
+            useToolStore.getState(),
+          );
+
+          const api = manifest?.api.find((api) => Md5.hashStr(api.name).toString() === md5);
+          if (api) {
+            payload.apiName = api.name;
+          }
         }
-      }
 
-      return payload;
-    });
+        return payload;
+      })
+      .filter(Boolean) as ChatToolPayload[];
   },
 
   invokeBuiltinTool: async (id, payload) => {
@@ -205,10 +211,10 @@ export const chatPlugin: StateCreator<
     return data;
   },
 
-  triggerAIMessage: async (id, traceId) => {
+  triggerAIMessage: async ({ parentId, traceId }) => {
     const { internal_coreProcessMessage } = get();
     const chats = chatSelectors.currentChats(get());
-    await internal_coreProcessMessage(chats, id, { traceId });
+    await internal_coreProcessMessage(chats, parentId ?? chats.at(-1)!.id, { traceId });
   },
 
   triggerToolCalls: async (assistantId) => {
@@ -229,9 +235,10 @@ export const chatPlugin: StateCreator<
       const toolMessage: CreateMessageParams = {
         content: LOADING_FLAT,
         parentId: assistantId,
+        plugin: payload,
         role: 'tool',
         sessionId: get().activeId,
-        tool: payload,
+        tool_call_id: payload.id,
         topicId: get().activeTopicId, // if there is activeTopicId，then add it to topicId
       };
 
@@ -270,7 +277,7 @@ export const chatPlugin: StateCreator<
 
     const traceId = chatSelectors.getTraceIdByMessageId(latestToolId)(get());
 
-    await triggerAIMessage(latestToolId, traceId);
+    await triggerAIMessage({ traceId });
   },
 
   updatePluginState: async (id, key, value) => {
diff --git a/src/store/chat/slices/topic/action.ts b/src/store/chat/slices/topic/action.ts
index 3c907705a1c5..9ddf1dce9660 100644
--- a/src/store/chat/slices/topic/action.ts
+++ b/src/store/chat/slices/topic/action.ts
@@ -134,8 +134,13 @@ export const chatTopic: StateCreator<
       onLoadingChange: (loading) => {
         internal_updateTopicLoading(topicId, loading);
       },
-      onMessageHandle: (x) => {
-        output += x;
+      onMessageHandle: (chunk) => {
+        switch (chunk.type) {
+          case 'text': {
+            output += chunk.text;
+          }
+        }
+
         updateTopicTitleInSummary(topicId, output);
       },
       params: await chainSummaryTitle(messages),
diff --git a/src/store/tool/selectors/tool.ts b/src/store/tool/selectors/tool.ts
index b300547e8a70..bd4ae0f6e4a3 100644
--- a/src/store/tool/selectors/tool.ts
+++ b/src/store/tool/selectors/tool.ts
@@ -1,35 +1,16 @@
 import { LobeChatPluginManifest } from '@lobehub/chat-plugin-sdk';
 import { uniqBy } from 'lodash-es';
-import { Md5 } from 'ts-md5';
 
-import { PLUGIN_SCHEMA_API_MD5_PREFIX, PLUGIN_SCHEMA_SEPARATOR } from '@/const/plugin';
 import { MetaData } from '@/types/meta';
 import { ChatCompletionTool } from '@/types/openai/chat';
 import { LobeToolMeta } from '@/types/tool/tool';
+import { genToolCallingName } from '@/utils/toolCall';
 
 import { pluginHelpers } from '../helpers';
 import { ToolStoreState } from '../initialState';
 import { builtinToolSelectors } from '../slices/builtin/selectors';
 import { pluginSelectors } from '../slices/plugin/selectors';
 
-const getAPIName = (identifier: string, name: string, type?: string) => {
-  const pluginType = type && type !== 'default' ? `${PLUGIN_SCHEMA_SEPARATOR + type}` : '';
-
-  // 将插件的 identifier 作为前缀，避免重复
-  let apiName = identifier + PLUGIN_SCHEMA_SEPARATOR + name + pluginType;
-
-  // OpenAI GPT function_call name can't be longer than 64 characters
-  // So we need to use md5 to shorten the name
-  // and then find the correct apiName in response by md5
-  if (apiName.length >= 64) {
-    const md5Content = PLUGIN_SCHEMA_API_MD5_PREFIX + Md5.hashStr(name).toString();
-
-    apiName = identifier + PLUGIN_SCHEMA_SEPARATOR + md5Content + pluginType;
-  }
-
-  return apiName;
-};
-
 const enabledSchema =
   (tools: string[] = []) =>
   (s: ToolStoreState): ChatCompletionTool[] => {
@@ -41,7 +22,7 @@ const enabledSchema =
       .flatMap((manifest) =>
         manifest.api.map((m) => ({
           description: m.description,
-          name: getAPIName(manifest.identifier, m.name, manifest.type),
+          name: genToolCallingName(manifest.identifier, m.name, manifest.type),
           parameters: m.parameters,
         })),
       );
@@ -67,9 +48,10 @@ const enabledSystemRoles =
 
         const methods = manifest.api
           .map((m) =>
-            [`#### ${getAPIName(manifest.identifier, m.name, manifest.type)}`, m.description].join(
-              '\n\n',
-            ),
+            [
+              `#### ${genToolCallingName(manifest.identifier, m.name, manifest.type)}`,
+              m.description,
+            ].join('\n\n'),
           )
           .join('\n\n');
 
diff --git a/src/types/message/index.ts b/src/types/message/index.ts
index c0bed2b61f99..5f18ed5b2af0 100644
--- a/src/types/message/index.ts
+++ b/src/types/message/index.ts
@@ -5,7 +5,7 @@ import { ErrorType } from '@/types/fetch';
 
 import { LLMRoleType } from '../llm';
 import { BaseDataModel } from '../meta';
-import { ChatPluginPayload, ChatToolPayload, MessageToolCall } from './tools';
+import { ChatPluginPayload, ChatToolPayload } from './tools';
 import { Translate } from './translate';
 
 /**
@@ -55,9 +55,7 @@ export interface ChatMessage extends BaseDataModel {
    * parent message id
    */
   parentId?: string;
-  /**
-   * @deprecated
-   */
+
   plugin?: ChatPluginPayload;
   pluginState?: any;
 
@@ -71,8 +69,7 @@ export interface ChatMessage extends BaseDataModel {
   role: LLMRoleType;
   sessionId?: string;
 
-  tool?: ChatToolPayload;
-  tool_calls?: MessageToolCall[];
+  tool_call_id?: string;
   tools?: ChatToolPayload[];
 
   /**
diff --git a/src/utils/fetch.ts b/src/utils/fetch.ts
index 5ffc062cc990..d89cae89a9c9 100644
--- a/src/utils/fetch.ts
+++ b/src/utils/fetch.ts
@@ -56,6 +56,7 @@ interface MessageToolCallsChunk {
 }
 
 export interface FetchSSEOptions {
+  fetcher?: typeof fetch;
   onAbort?: (text: string) => Promise<void>;
   onErrorHandle?: (error: ChatMessageError) => void;
   onFinish?: OnFinishHandler;
@@ -86,6 +87,7 @@ export const parseToolCalls = (origin: MessageToolCall[], value: MessageToolCall
 export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptions = {}) => {
   let output = '';
   let toolCalls: undefined | MessageToolCall[];
+  let triggerOnMessageHandler = false;
 
   let finishedType: SSEFinishType = 'done';
   let response!: Response;
@@ -93,6 +95,7 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
   try {
     await fetchEventSource(url, {
       body: options.body,
+      fetch: options?.fetcher,
       headers: options.headers as Record<string, string>,
       method: options.method,
       onerror: (error) => {
@@ -107,7 +110,16 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
         // options.onErrorHandle()
       },
       onmessage: (ev) => {
-        const data = JSON.parse(ev.data);
+        triggerOnMessageHandler = true;
+        let data;
+        try {
+          data = JSON.parse(ev.data);
+        } catch (e) {
+          console.warn('parse error, fallback to stream', e);
+          options.onMessageHandle?.({ text: data, type: 'text' });
+          return;
+        }
+
         switch (ev.event) {
           case 'text': {
             output += data;
@@ -145,9 +157,18 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
     });
   } catch {}
 
-  const traceId = response.headers.get(LOBE_CHAT_TRACE_ID);
-  const observationId = response.headers.get(LOBE_CHAT_OBSERVATION_ID);
-  await options?.onFinish?.(output, { observationId, toolCalls, traceId, type: finishedType });
+  // only call onFinish when response is available
+  // so like abort, we don't need to call onFinish
+  if (response) {
+    // if there is no onMessageHandler, we should call onHandleMessage first
+    if (!triggerOnMessageHandler) {
+      options.onMessageHandle?.({ text: await response.clone().text(), type: 'text' });
+    }
+
+    const traceId = response.headers.get(LOBE_CHAT_TRACE_ID);
+    const observationId = response.headers.get(LOBE_CHAT_OBSERVATION_ID);
+    await options?.onFinish?.(output, { observationId, toolCalls, traceId, type: finishedType });
+  }
 
   return response;
 };
diff --git a/src/utils/toolCall.ts b/src/utils/toolCall.ts
new file mode 100644
index 000000000000..59046f697992
--- /dev/null
+++ b/src/utils/toolCall.ts
@@ -0,0 +1,21 @@
+import { Md5 } from 'ts-md5';
+
+import { PLUGIN_SCHEMA_API_MD5_PREFIX, PLUGIN_SCHEMA_SEPARATOR } from '@/const/plugin';
+
+export const genToolCallingName = (identifier: string, name: string, type?: string) => {
+  const pluginType = type && type !== 'default' ? `${PLUGIN_SCHEMA_SEPARATOR + type}` : '';
+
+  // 将插件的 identifier 作为前缀，避免重复
+  let apiName = identifier + PLUGIN_SCHEMA_SEPARATOR + name + pluginType;
+
+  // OpenAI GPT function_call name can't be longer than 64 characters
+  // So we need to use md5 to shorten the name
+  // and then find the correct apiName in response by md5
+  if (apiName.length >= 64) {
+    const md5Content = PLUGIN_SCHEMA_API_MD5_PREFIX + Md5.hashStr(name).toString();
+
+    apiName = identifier + PLUGIN_SCHEMA_SEPARATOR + md5Content + pluginType;
+  }
+
+  return apiName;
+};

From 5d1ed62758d15a92d1b036a617a14d9da66b8fd6 Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Wed, 8 May 2024 03:18:52 +0000
Subject: [PATCH 03/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20zhipu=20GLM?=
 =?UTF-8?q?=20tool=20calling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/config/modelProviders/zhipu.ts    | 8 ++------
 src/libs/agent-runtime/zhipu/index.ts | 5 +++--
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/config/modelProviders/zhipu.ts b/src/config/modelProviders/zhipu.ts
index 55a51950fbfe..19d400454b2f 100644
--- a/src/config/modelProviders/zhipu.ts
+++ b/src/config/modelProviders/zhipu.ts
@@ -1,9 +1,5 @@
 import { ModelProviderCard } from '@/types/llm';
 
-// TODO: 等待 ZhiPu 修复 API 问题后开启 functionCall
-// 暂时不透出 GLM 系列的 function_call 功能
-// refs https://github.com/lobehub/lobe-chat/discussions/737#discussioncomment-8315815
-
 // ref https://open.bigmodel.cn/dev/howuse/model
 const ZhiPu: ModelProviderCard = {
   chatModels: [
@@ -11,7 +7,7 @@ const ZhiPu: ModelProviderCard = {
       description: '最新的 GLM-4 、最大支持 128k 上下文、支持 Function Call 、Retreival',
       displayName: 'GLM-4',
       enabled: true,
-      // functionCall: true,
+      functionCall: true,
       id: 'glm-4',
       tokens: 128_000,
     },
@@ -28,7 +24,7 @@ const ZhiPu: ModelProviderCard = {
       description: '最新的glm-3-turbo、最大支持 128k上下文、支持Function Call、Retreival',
       displayName: 'GLM-3 Turbo',
       enabled: true,
-      // functionCall: true,
+      functionCall: true,
       id: 'glm-3-turbo',
       tokens: 128_000,
     },
diff --git a/src/libs/agent-runtime/zhipu/index.ts b/src/libs/agent-runtime/zhipu/index.ts
index 325de0b04e0b..384a50fff358 100644
--- a/src/libs/agent-runtime/zhipu/index.ts
+++ b/src/libs/agent-runtime/zhipu/index.ts
@@ -1,4 +1,3 @@
-import { OpenAIStream, StreamingTextResponse } from 'ai';
 import OpenAI, { ClientOptions } from 'openai';
 
 import { LobeRuntimeAI } from '../BaseAI';
@@ -13,6 +12,8 @@ import { AgentRuntimeError } from '../utils/createError';
 import { debugStream } from '../utils/debugStream';
 import { desensitizeUrl } from '../utils/desensitizeUrl';
 import { handleOpenAIError } from '../utils/handleOpenAIError';
+import { StreamingResponse } from '../utils/response';
+import { OpenAIStream } from '../utils/streams';
 import { parseDataUri } from '../utils/uriParser';
 import { generateApiToken } from './authToken';
 
@@ -63,7 +64,7 @@ export class LobeZhipuAI implements LobeRuntimeAI {
         debugStream(debug.toReadableStream()).catch(console.error);
       }
 
-      return new StreamingTextResponse(OpenAIStream(prod, options?.callback), {
+      return StreamingResponse(OpenAIStream(prod, options?.callback), {
         headers: options?.headers,
       });
     } catch (error) {

From 326102e66e68a14ac4cd853af1436eeeb96b7024 Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Wed, 8 May 2024 04:16:09 +0000
Subject: [PATCH 04/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20moonshot=20?=
 =?UTF-8?q?function=20calling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/config/modelProviders/moonshot.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/config/modelProviders/moonshot.ts b/src/config/modelProviders/moonshot.ts
index 9c4916ab7c20..f5a63d4eefb2 100644
--- a/src/config/modelProviders/moonshot.ts
+++ b/src/config/modelProviders/moonshot.ts
@@ -6,18 +6,21 @@ const Moonshot: ModelProviderCard = {
     {
       displayName: 'Moonshot V1 8K',
       enabled: true,
+      functionCall: true,
       id: 'moonshot-v1-8k',
       tokens: 8192,
     },
     {
       displayName: 'Moonshot V1 32K',
       enabled: true,
+      functionCall: true,
       id: 'moonshot-v1-32k',
       tokens: 32_768,
     },
     {
       displayName: 'Moonshot V1 128K',
       enabled: true,
+      functionCall: true,
       id: 'moonshot-v1-128k',
       tokens: 128_000,
     },

From 6cf086c9a32c0e704adc78a8007e29d4a0f3fd8b Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Wed, 8 May 2024 04:31:40 +0000
Subject: [PATCH 05/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20groq=20tool?=
 =?UTF-8?q?=20callings?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/config/modelProviders/groq.ts             |  6 +-
 src/libs/agent-runtime/groq/index.ts          |  7 ++
 .../utils/openaiCompatibleFactory/index.ts    | 73 ++++++++++++++++++-
 3 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/src/config/modelProviders/groq.ts b/src/config/modelProviders/groq.ts
index 2a3a71b5dc3a..12f112946d46 100644
--- a/src/config/modelProviders/groq.ts
+++ b/src/config/modelProviders/groq.ts
@@ -6,24 +6,28 @@ const Groq: ModelProviderCard = {
     {
       displayName: 'LLaMA3-3-70B',
       enabled: true,
+      functionCall: true,
       id: 'llama3-70b-8192',
       tokens: 8192,
     },
     {
-      displayName: 'Mixtral-8x7b-Instruct-v0.1',
+      displayName: 'Mixtral-8x7b',
       enabled: true,
+      functionCall: true,
       id: 'mixtral-8x7b-32768',
       tokens: 32_768,
     },
     {
       displayName: 'Gemma-7b-it',
       enabled: true,
+      functionCall: true,
       id: 'gemma-7b-it',
       tokens: 8192,
     },
     {
       displayName: 'LLaMA3-3-8B',
       enabled: true,
+      functionCall: true,
       id: 'llama3-8b-8192',
       tokens: 8192,
     },
diff --git a/src/libs/agent-runtime/groq/index.ts b/src/libs/agent-runtime/groq/index.ts
index f30c1a9c159d..0774f0f8da6e 100644
--- a/src/libs/agent-runtime/groq/index.ts
+++ b/src/libs/agent-runtime/groq/index.ts
@@ -10,6 +10,13 @@ export const LobeGroq = LobeOpenAICompatibleFactory({
       if (error.status === 403)
         return { error, errorType: AgentRuntimeErrorType.LocationNotSupportError };
     },
+    handlePayload: (payload) => {
+      return {
+        ...payload,
+        // disable stream for tools due to groq dont support
+        stream: !payload.tools,
+      } as any;
+    },
   },
   debug: {
     chatCompletion: () => process.env.DEBUG_GROQ_CHAT_COMPLETION === '1',
diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
index 1c58bf5b1723..dda87d6ffab3 100644
--- a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
+++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
@@ -81,13 +81,26 @@ export const LobeOpenAICompatibleFactory = ({
           signal: options?.signal,
         });
 
-        const [prod, useForDebug] = response.tee();
+        if (postPayload.stream) {
+          const [prod, useForDebug] = response.tee();
+
+          if (debug?.chatCompletion?.()) {
+            debugStream(useForDebug.toReadableStream()).catch(console.error);
+          }
+
+          return StreamingResponse(OpenAIStream(prod, options?.callback), {
+            headers: options?.headers,
+          });
+        }
 
         if (debug?.chatCompletion?.()) {
-          debugStream(useForDebug.toReadableStream()).catch(console.error);
+          console.log('\n[no stream response]\n');
+          console.log(JSON.stringify(response) + '\n');
         }
 
-        return StreamingResponse(OpenAIStream(prod, options?.callback), {
+        const stream = this.transformResponseToStream(response as unknown as OpenAI.ChatCompletion);
+
+        return StreamingResponse(OpenAIStream(stream, options?.callback), {
           headers: options?.headers,
         });
       } catch (error) {
@@ -161,4 +174,58 @@ export const LobeOpenAICompatibleFactory = ({
 
         .filter(Boolean) as ChatModelCard[];
     }
+
+    /**
+     * make the OpenAI response data as a stream
+     * @private
+     */
+    private transformResponseToStream(data: OpenAI.ChatCompletion) {
+      return new ReadableStream({
+        start(controller) {
+          const chunk: OpenAI.ChatCompletionChunk = {
+            choices: data.choices.map((choice: OpenAI.ChatCompletion.Choice) => ({
+              delta: {
+                content: choice.message.content,
+                role: choice.message.role,
+                tool_calls: choice.message.tool_calls?.map(
+                  (tool, index): OpenAI.ChatCompletionChunk.Choice.Delta.ToolCall => ({
+                    function: tool.function,
+                    id: tool.id,
+                    index,
+                    type: tool.type,
+                  }),
+                ),
+              },
+              finish_reason: null,
+              index: choice.index,
+              logprobs: choice.logprobs,
+            })),
+            created: data.created,
+            id: data.id,
+            model: data.model,
+            object: 'chat.completion.chunk',
+          };
+
+          controller.enqueue(chunk);
+
+          controller.enqueue({
+            choices: data.choices.map((choice: OpenAI.ChatCompletion.Choice) => ({
+              delta: {
+                content: choice.message.content,
+                role: choice.message.role,
+              },
+              finish_reason: choice.finish_reason,
+              index: choice.index,
+              logprobs: choice.logprobs,
+            })),
+            created: data.created,
+            id: data.id,
+            model: data.model,
+            object: 'chat.completion.chunk',
+            system_fingerprint: data.system_fingerprint,
+          } as OpenAI.ChatCompletionChunk);
+          controller.close();
+        },
+      });
+    }
   };

From dde5582d1c247914e547c771981a365a49b78dbb Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Wed, 8 May 2024 05:57:06 +0000
Subject: [PATCH 06/24] =?UTF-8?q?=E2=9C=A8=20feat:=20azure=20openai=20stre?=
 =?UTF-8?q?am=20tool=20calling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/libs/agent-runtime/azureOpenai/index.ts    | 11 ++++++-----
 src/libs/agent-runtime/utils/streams/openai.ts |  7 ++++---
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/libs/agent-runtime/azureOpenai/index.ts b/src/libs/agent-runtime/azureOpenai/index.ts
index 479ddcd409ca..113245bcbbe3 100644
--- a/src/libs/agent-runtime/azureOpenai/index.ts
+++ b/src/libs/agent-runtime/azureOpenai/index.ts
@@ -4,13 +4,14 @@ import {
   GetChatCompletionsOptions,
   OpenAIClient,
 } from '@azure/openai';
-import { OpenAIStream, StreamingTextResponse } from 'ai';
 
 import { LobeRuntimeAI } from '../BaseAI';
 import { AgentRuntimeErrorType } from '../error';
 import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../types';
 import { AgentRuntimeError } from '../utils/createError';
 import { debugStream } from '../utils/debugStream';
+import { StreamingResponse } from '../utils/response';
+import { OpenAIStream } from '../utils/streams';
 
 export class LobeAzureOpenAI implements LobeRuntimeAI {
   client: OpenAIClient;
@@ -40,15 +41,15 @@ export class LobeAzureOpenAI implements LobeRuntimeAI {
         { ...params, abortSignal: options?.signal, maxTokens } as GetChatCompletionsOptions,
       );
 
-      const stream = OpenAIStream(response as any);
-
-      const [debug, prod] = stream.tee();
+      const [debug, prod] = response.tee();
 
       if (process.env.DEBUG_AZURE_CHAT_COMPLETION === '1') {
         debugStream(debug).catch(console.error);
       }
 
-      return new StreamingTextResponse(prod);
+      return StreamingResponse(OpenAIStream(prod, options?.callback), {
+        headers: options?.headers,
+      });
     } catch (e) {
       let error = e as { [key: string]: any; code: string; message: string };
 
diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts
index dd8010fffe36..803e2b01578b 100644
--- a/src/libs/agent-runtime/utils/streams/openai.ts
+++ b/src/libs/agent-runtime/utils/streams/openai.ts
@@ -6,11 +6,12 @@ import { ChatStreamCallbacks } from '../../types';
 import { transformOpenAIStream } from './protocol';
 
 export const OpenAIStream = (
-  stream: Stream<OpenAI.ChatCompletionChunk>,
+  stream: Stream<OpenAI.ChatCompletionChunk> | ReadableStream,
   callbacks?: ChatStreamCallbacks,
 ) => {
-  return stream
-    .toReadableStream()
+  const readableStream = stream instanceof ReadableStream ? stream : stream.toReadableStream();
+
+  return readableStream
     .pipeThrough(
       new TransformStream({
         transform: (chunk, controller) => {

From ad30764fd48f48b61f7f64d63d87a5a532f47990 Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Wed, 8 May 2024 09:18:25 +0000
Subject: [PATCH 07/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20minimax=20t?=
 =?UTF-8?q?ool=20calling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/config/modelProviders/minimax.ts          | 17 ++++---
 src/config/server/provider.ts                 |  2 +-
 src/libs/agent-runtime/minimax/index.ts       | 51 +++++--------------
 src/libs/agent-runtime/utils/streams/index.ts |  1 +
 .../agent-runtime/utils/streams/minimax.ts    | 39 ++++++++++++++
 .../agent-runtime/utils/streams/openai.ts     | 11 +++-
 .../agent-runtime/utils/streams/protocol.ts   | 47 ++++++++++-------
 7 files changed, 103 insertions(+), 65 deletions(-)
 create mode 100644 src/libs/agent-runtime/utils/streams/minimax.ts

diff --git a/src/config/modelProviders/minimax.ts b/src/config/modelProviders/minimax.ts
index f3cd3a60b6bc..2fbfb0941d8e 100644
--- a/src/config/modelProviders/minimax.ts
+++ b/src/config/modelProviders/minimax.ts
@@ -3,24 +3,27 @@ import { ModelProviderCard } from '@/types/llm';
 // ref https://www.minimaxi.com/document/guides/chat-model/pro/api
 const Minimax: ModelProviderCard = {
   chatModels: [
-    {
-      description: '复杂场景，例如应用题计算、科学计算等场景',
-      displayName: 'abab6.5',
-      enabled: true,
-      id: 'abab6.5-chat',
-      tokens: 8192,
-    },
     {
       description: '通用场景',
       displayName: 'abab6.5s',
       enabled: true,
+      functionCall: true,
       id: 'abab6.5s-chat',
       tokens: 245_760,
     },
+    {
+      description: '复杂场景，例如应用题计算、科学计算等场景',
+      displayName: 'abab6.5',
+      enabled: true,
+      functionCall: true,
+      id: 'abab6.5-chat',
+      tokens: 8192,
+    },
     {
       description: '更复杂的格式化文本生成',
       displayName: 'abab6',
       enabled: true,
+      functionCall: true,
       id: 'abab6-chat',
       tokens: 32_768,
     },
diff --git a/src/config/server/provider.ts b/src/config/server/provider.ts
index d1d497dab4a4..28b43e91873d 100644
--- a/src/config/server/provider.ts
+++ b/src/config/server/provider.ts
@@ -211,7 +211,7 @@ export const getProviderConfig = () => {
     AWS_ACCESS_KEY_ID: AWS_ACCESS_KEY_ID,
     AWS_SECRET_ACCESS_KEY: process.env.AWS_SECRET_ACCESS_KEY || '',
 
-    ENABLE_OLLAMA: process.env.ENABLE_OLLAMA as unknown as boolean,
+    ENABLE_OLLAMA: Boolean(process.env.ENABLE_OLLAMA),
     OLLAMA_PROXY_URL: process.env.OLLAMA_PROXY_URL || '',
     OLLAMA_MODEL_LIST: process.env.OLLAMA_MODEL_LIST || process.env.OLLAMA_CUSTOM_MODELS,
   };
diff --git a/src/libs/agent-runtime/minimax/index.ts b/src/libs/agent-runtime/minimax/index.ts
index 9c850482e490..85c372069a17 100644
--- a/src/libs/agent-runtime/minimax/index.ts
+++ b/src/libs/agent-runtime/minimax/index.ts
@@ -1,9 +1,6 @@
-import { StreamingTextResponse } from 'ai';
 import { isEmpty } from 'lodash-es';
 import OpenAI from 'openai';
 
-import { debugStream } from '@/libs/agent-runtime/utils/debugStream';
-
 import { LobeRuntimeAI } from '../BaseAI';
 import { AgentRuntimeErrorType } from '../error';
 import {
@@ -13,6 +10,9 @@ import {
   ModelProvider,
 } from '../types';
 import { AgentRuntimeError } from '../utils/createError';
+import { debugStream } from '../utils/debugStream';
+import { StreamingResponse } from '../utils/response';
+import { MinimaxStream } from '../utils/streams';
 
 interface MinimaxBaseResponse {
   base_resp?: {
@@ -69,18 +69,8 @@ export class LobeMinimaxAI implements LobeRuntimeAI {
     this.apiKey = apiKey;
   }
 
-  async chat(
-    payload: ChatStreamPayload,
-    options?: ChatCompetitionOptions,
-  ): Promise<StreamingTextResponse> {
+  async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions): Promise<Response> {
     try {
-      let streamController: ReadableStreamDefaultController | undefined;
-      const readableStream = new ReadableStream({
-        start(controller) {
-          streamController = controller;
-        },
-      });
-
       const response = await fetch('https://api.minimax.chat/v1/text/chatcompletion_v2', {
         body: JSON.stringify(this.buildCompletionsParams(payload)),
         headers: {
@@ -107,12 +97,10 @@ export class LobeMinimaxAI implements LobeRuntimeAI {
         debugStream(debug).catch(console.error);
       }
 
-      this.parseResponse(prod.getReader(), streamController);
-
       // wait for the first response, and throw error if minix returns an error
       await this.parseFirstResponse(prod2.getReader());
 
-      return new StreamingTextResponse(readableStream, { headers: options?.headers });
+      return StreamingResponse(MinimaxStream(prod), { headers: options?.headers });
     } catch (error) {
       console.log('error', error);
       const err = error as Error | ChatCompletionErrorPayload;
@@ -154,30 +142,19 @@ export class LobeMinimaxAI implements LobeRuntimeAI {
       max_tokens: this.getMaxTokens(payload.model),
       stream: true,
       temperature: temperature === 0 ? undefined : temperature,
+
+      tools: params.tools?.map((tool) => ({
+        function: {
+          description: tool.function.description,
+          name: tool.function.name,
+          parameters: JSON.stringify(tool.function.parameters),
+        },
+        type: 'function',
+      })),
       top_p: top_p === 0 ? undefined : top_p,
     };
   }
 
-  private async parseResponse(
-    reader: ReadableStreamDefaultReader<Uint8Array>,
-    streamController: ReadableStreamDefaultController | undefined,
-  ) {
-    const encoder = new TextEncoder();
-    const decoder = new TextDecoder();
-    let done = false;
-
-    while (!done) {
-      const { value, done: doneReading } = await reader.read();
-      done = doneReading;
-      const chunkValue = decoder.decode(value, { stream: true });
-      const data = parseMinimaxResponse(chunkValue);
-      const text = data?.choices?.at(0)?.delta?.content || undefined;
-      streamController?.enqueue(encoder.encode(text));
-    }
-
-    streamController?.close();
-  }
-
   private async parseFirstResponse(reader: ReadableStreamDefaultReader<Uint8Array>) {
     const decoder = new TextDecoder();
 
diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts
index 0a8bed033dba..1e2d3f465d57 100644
--- a/src/libs/agent-runtime/utils/streams/index.ts
+++ b/src/libs/agent-runtime/utils/streams/index.ts
@@ -1,2 +1,3 @@
 export * from './openai';
+export * from './minimax';
 export * from './protocol';
diff --git a/src/libs/agent-runtime/utils/streams/minimax.ts b/src/libs/agent-runtime/utils/streams/minimax.ts
new file mode 100644
index 000000000000..cdab8bc26f32
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/minimax.ts
@@ -0,0 +1,39 @@
+import { createCallbacksTransformer } from 'ai';
+import OpenAI from 'openai';
+
+import { ChatStreamCallbacks } from '../../types';
+import { transformOpenAIStream } from './protocol';
+
+const unit8ArrayToJSONChunk = (unit8Array: Uint8Array): OpenAI.ChatCompletionChunk => {
+  const decoder = new TextDecoder();
+
+  let chunkValue = decoder.decode(unit8Array, { stream: true });
+
+  // chunkValue example:
+  // data: {"id":"028a65377137d57aaceeffddf48ae99f","choices":[{"finish_reason":"tool_calls","index":0,"delta":{"role":"assistant","tool_calls":[{"id":"call_function_7371372822","type":"function","function":{"name":"realtime-weather____fetchCurrentWeather","arguments":"{\"city\": [\"杭州\", \"北京\"]}"}}]}}],"created":155511,"model":"abab6.5s-chat","object":"chat.completion.chunk"}
+
+  // so we need to remove `data:` prefix and then parse it as JSON
+  if (chunkValue.startsWith('data:')) {
+    chunkValue = chunkValue.slice(5).trim();
+  }
+
+  return JSON.parse(chunkValue);
+};
+
+export const MinimaxStream = (stream: ReadableStream, callbacks?: ChatStreamCallbacks) => {
+  return stream
+    .pipeThrough(
+      new TransformStream({
+        transform: (buffer, controller) => {
+          const chunk = unit8ArrayToJSONChunk(buffer);
+
+          const { type, id, data } = transformOpenAIStream(chunk);
+
+          controller.enqueue(`id: ${id}\n`);
+          controller.enqueue(`event: ${type}\n`);
+          controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
+        },
+      }),
+    )
+    .pipeThrough(createCallbacksTransformer(callbacks));
+};
diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts
index 803e2b01578b..8b621d2bbae1 100644
--- a/src/libs/agent-runtime/utils/streams/openai.ts
+++ b/src/libs/agent-runtime/utils/streams/openai.ts
@@ -1,15 +1,22 @@
-import { createCallbacksTransformer } from 'ai';
+import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai';
 import OpenAI from 'openai';
 import type { Stream } from 'openai/streaming';
 
 import { ChatStreamCallbacks } from '../../types';
 import { transformOpenAIStream } from './protocol';
 
+const chatStreamable = async function* (stream: AsyncIterable<OpenAI.ChatCompletionChunk>) {
+  for await (const response of stream) {
+    yield response;
+  }
+};
+
 export const OpenAIStream = (
   stream: Stream<OpenAI.ChatCompletionChunk> | ReadableStream,
   callbacks?: ChatStreamCallbacks,
 ) => {
-  const readableStream = stream instanceof ReadableStream ? stream : stream.toReadableStream();
+  const readableStream =
+    stream instanceof ReadableStream ? stream : readableFromAsyncIterable(chatStreamable(stream));
 
   return readableStream
     .pipeThrough(
diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts
index d2a5b873147f..658c36b71e1b 100644
--- a/src/libs/agent-runtime/utils/streams/protocol.ts
+++ b/src/libs/agent-runtime/utils/streams/protocol.ts
@@ -1,41 +1,52 @@
 import OpenAI from 'openai';
 
-interface StreamProtocolChunk {
+export interface StreamProtocolChunk {
   data: any;
   id: string;
   type: 'text' | 'tool_calls' | 'data' | 'stop';
 }
 
-export const transformOpenAIStream = (chunk: Uint8Array): StreamProtocolChunk => {
-  const decoder = new TextDecoder();
-
-  const chunkValue = decoder.decode(chunk, { stream: true });
-  const jsonValue: OpenAI.ChatCompletionChunk = JSON.parse(chunkValue);
-
+export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): StreamProtocolChunk => {
   // maybe need another structure to add support for multiple choices
-  const item = jsonValue.choices[0];
+  const item = chunk.choices[0];
 
-  if (typeof item.delta.content === 'string') {
-    return { data: item.delta.content, id: jsonValue.id, type: 'text' };
+  if (typeof item.delta?.content === 'string') {
+    return { data: item.delta.content, id: chunk.id, type: 'text' };
   }
 
-  if (item.delta.tool_calls) {
-    return { data: item.delta.tool_calls, id: jsonValue.id, type: 'tool_calls' };
-  }
+  if (item.delta?.tool_calls) {
+    return {
+      data: item.delta.tool_calls.map((value, index) => ({
+        ...value,
 
-  if (item.delta.content === null) {
-    return { data: item.delta, id: jsonValue.id, type: 'data' };
+        // mistral's tool calling don't have index and function field, it's data like:
+        // [{"id":"xbhnmTtY7","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"A photo of a small, fluffy dog with a playful expression and wagging tail.\", \"A watercolor painting of a small, energetic dog with a glossy coat and bright eyes.\", \"A vector illustration of a small, adorable dog with a short snout and perky ears.\", \"A drawing of a small, scruffy dog with a mischievous grin and a wagging tail.\"], \"quality\": \"standard\", \"seeds\": [123456, 654321, 111222, 333444], \"size\": \"1024x1024\", \"style\": \"vivid\"}"}}]
+
+        // minimax's tool calling don't have index field, it's data like:
+        // [{"id":"call_function_4752059746","type":"function","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"一个流浪的地球，背景是浩瀚"}}]
+
+        // so we need to add these default values
+        index: typeof value.index !== 'undefined' ? value.index : index,
+        type: value.type || 'function',
+      })),
+      id: chunk.id,
+      type: 'tool_calls',
+    };
   }
 
   // 给定结束原因
   if (item.finish_reason) {
-    return { data: item.finish_reason, id: jsonValue.id, type: 'stop' };
+    return { data: item.finish_reason, id: chunk.id, type: 'stop' };
+  }
+
+  if (item.delta.content === null) {
+    return { data: item.delta, id: chunk.id, type: 'data' };
   }
 
   // 其余情况下，返回 delta 和 index
   return {
-    data: { delta: item.delta, id: jsonValue.id, index: item.index },
-    id: jsonValue.id,
+    data: { delta: item.delta, id: chunk.id, index: item.index },
+    id: chunk.id,
     type: 'data',
   };
 };

From 1292e802b360cfdb9750910ac65899bb6013b2a6 Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Thu, 9 May 2024 02:12:39 +0800
Subject: [PATCH 08/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20mistral=20t?=
 =?UTF-8?q?ool=20calling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/config/modelProviders/mistral.ts    | 1 +
 src/libs/agent-runtime/mistral/index.ts | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/config/modelProviders/mistral.ts b/src/config/modelProviders/mistral.ts
index e2f870b8af8e..959362fd0686 100644
--- a/src/config/modelProviders/mistral.ts
+++ b/src/config/modelProviders/mistral.ts
@@ -18,6 +18,7 @@ const Mistral: ModelProviderCard = {
     {
       displayName: 'Mixtral 8x22B',
       enabled: true,
+      functionCall: true,
       id: 'open-mixtral-8x22b',
       tokens: 65_536,
     },
diff --git a/src/libs/agent-runtime/mistral/index.ts b/src/libs/agent-runtime/mistral/index.ts
index cd23f01f52f5..5bd1980a228a 100644
--- a/src/libs/agent-runtime/mistral/index.ts
+++ b/src/libs/agent-runtime/mistral/index.ts
@@ -11,6 +11,7 @@ export const LobeMistralAI = LobeOpenAICompatibleFactory({
       model: payload.model,
       stream: true,
       temperature: payload.temperature,
+      tools: payload.tools,
       top_p: payload.top_p,
     }),
   },

From 7e687231d3803b3eed85617eeb17fc02ffc4b4bf Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Thu, 9 May 2024 05:54:31 +0000
Subject: [PATCH 09/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20google=20to?=
 =?UTF-8?q?ol=20calling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/config/modelProviders/google.ts           |   3 +
 src/libs/agent-runtime/google/index.ts        | 109 +++++++++++++++--
 src/libs/agent-runtime/utils/debugStream.ts   |  48 ++++++--
 .../agent-runtime/utils/streams/google-ai.ts  | 110 ++++++++++++++++++
 src/libs/agent-runtime/utils/streams/index.ts |   1 +
 .../agent-runtime/utils/streams/minimax.ts    |   2 +-
 .../agent-runtime/utils/streams/openai.ts     |  50 +++++++-
 .../agent-runtime/utils/streams/protocol.ts   |  54 +++------
 8 files changed, 316 insertions(+), 61 deletions(-)
 create mode 100644 src/libs/agent-runtime/utils/streams/google-ai.ts

diff --git a/src/config/modelProviders/google.ts b/src/config/modelProviders/google.ts
index 3f5586f326e6..27e0566cd4d2 100644
--- a/src/config/modelProviders/google.ts
+++ b/src/config/modelProviders/google.ts
@@ -22,6 +22,7 @@ const Google: ModelProviderCard = {
       description: 'The best model for scaling across a wide range of tasks',
       displayName: 'Gemini 1.0 Pro',
       enabled: true,
+      functionCall: true,
       id: 'gemini-pro',
       maxOutput: 2048,
       tokens: 30_720 + 2048,
@@ -47,6 +48,7 @@ const Google: ModelProviderCard = {
       description:
         'The best model for scaling across a wide range of tasks. This is a stable model that supports tuning.',
       displayName: 'Gemini 1.0 Pro 001 (Tuning)',
+      functionCall: true,
       id: 'gemini-1.0-pro-001',
       maxOutput: 2048,
       tokens: 30_720 + 2048,
@@ -71,6 +73,7 @@ const Google: ModelProviderCard = {
       description: 'Mid-size multimodal model that supports up to 1 million tokens',
       displayName: 'Gemini 1.5 Pro',
       enabled: true,
+      functionCall: true,
       id: 'gemini-1.5-pro-latest',
       maxOutput: 8192,
       tokens: 1_048_576 + 8192,
diff --git a/src/libs/agent-runtime/google/index.ts b/src/libs/agent-runtime/google/index.ts
index c3f83ccc0e38..e9984be47799 100644
--- a/src/libs/agent-runtime/google/index.ts
+++ b/src/libs/agent-runtime/google/index.ts
@@ -1,10 +1,20 @@
-import { Content, GoogleGenerativeAI, Part } from '@google/generative-ai';
-import { GoogleGenerativeAIStream, StreamingTextResponse } from 'ai';
+import {
+  Content,
+  FunctionDeclaration,
+  FunctionDeclarationSchemaProperty,
+  FunctionDeclarationSchemaType,
+  Tool as GoogleFunctionCallTool,
+  GoogleGenerativeAI,
+  Part,
+} from '@google/generative-ai';
+import { JSONSchema7 } from 'json-schema';
+import { transform } from 'lodash-es';
 
 import { LobeRuntimeAI } from '../BaseAI';
 import { AgentRuntimeErrorType, ILobeAgentRuntimeErrorType } from '../error';
 import {
   ChatCompetitionOptions,
+  ChatCompletionTool,
   ChatStreamPayload,
   OpenAIChatMessage,
   UserMessageContentPart,
@@ -12,6 +22,8 @@ import {
 import { ModelProvider } from '../types/type';
 import { AgentRuntimeError } from '../utils/createError';
 import { debugStream } from '../utils/debugStream';
+import { StreamingResponse } from '../utils/response';
+import { GoogleGenerativeAIStream, googleGenAIResultToStream } from '../utils/streams';
 import { parseDataUri } from '../utils/uriParser';
 
 enum HarmCategory {
@@ -42,7 +54,7 @@ export class LobeGoogleAI implements LobeRuntimeAI {
 
       const contents = this.buildGoogleMessages(payload.messages, model);
 
-      const geminiStream = await this.client
+      const geminiStreamResult = await this.client
         .getGenerativeModel(
           {
             generationConfig: {
@@ -74,19 +86,20 @@ export class LobeGoogleAI implements LobeRuntimeAI {
           },
           { apiVersion: 'v1beta', baseUrl: this.baseURL },
         )
-        .generateContentStream({ contents });
-
-      // Convert the response into a friendly text-stream
-      const stream = GoogleGenerativeAIStream(geminiStream, options?.callback);
+        .generateContentStream({ contents, tools: this.buildGoogleTools(payload.tools) });
 
-      const [debug, output] = stream.tee();
+      const googleStream = googleGenAIResultToStream(geminiStreamResult);
+      const [prod, useForDebug] = googleStream.tee();
 
       if (process.env.DEBUG_GOOGLE_CHAT_COMPLETION === '1') {
-        debugStream(debug).catch(console.error);
+        debugStream(useForDebug).catch();
       }
 
+      // Convert the response into a friendly text-stream
+      const stream = GoogleGenerativeAIStream(prod, options?.callback);
+
       // Respond with the stream
-      return new StreamingTextResponse(output, { headers: options?.headers });
+      return StreamingResponse(stream, { headers: options?.headers });
     } catch (e) {
       const err = e as Error;
 
@@ -226,6 +239,82 @@ export class LobeGoogleAI implements LobeRuntimeAI {
       return defaultError;
     }
   }
+
+  private buildGoogleTools(
+    tools: ChatCompletionTool[] | undefined,
+  ): GoogleFunctionCallTool[] | undefined {
+    if (!tools || tools.length === 0) return;
+
+    return [
+      {
+        functionDeclarations: tools.map((tool) => {
+          const t = this.convertToolToGoogleTool(tool);
+          console.log('output Schema', t);
+          return t;
+        }),
+      },
+    ];
+  }
+
+  private convertToolToGoogleTool = (tool: ChatCompletionTool): FunctionDeclaration => {
+    const functionDeclaration = tool.function;
+    const parameters = functionDeclaration.parameters;
+
+    console.log('input Schema', JSON.stringify(parameters, null, 2));
+
+    return {
+      description: functionDeclaration.description,
+      name: functionDeclaration.name,
+      parameters: {
+        description: parameters?.description,
+        properties: transform(parameters?.properties, (result, value, key: string) => {
+          result[key] = this.convertSchemaObject(value as JSONSchema7);
+        }),
+        required: parameters?.required,
+        type: FunctionDeclarationSchemaType.OBJECT,
+      },
+    };
+  };
+
+  private convertSchemaObject(schema: JSONSchema7): FunctionDeclarationSchemaProperty {
+    console.log('input:', schema);
+
+    switch (schema.type) {
+      default:
+      case 'object': {
+        return {
+          ...schema,
+          properties: Object.fromEntries(
+            Object.entries(schema.properties || {}).map(([key, value]) => [
+              key,
+              this.convertSchemaObject(value as JSONSchema7),
+            ]),
+          ),
+          type: FunctionDeclarationSchemaType.OBJECT,
+        } as any;
+      }
+
+      case 'array': {
+        return {
+          ...schema,
+          items: this.convertSchemaObject(schema.items as JSONSchema7),
+          type: FunctionDeclarationSchemaType.ARRAY,
+        } as any;
+      }
+
+      case 'string': {
+        return { ...schema, type: FunctionDeclarationSchemaType.STRING } as any;
+      }
+
+      case 'number': {
+        return { ...schema, type: FunctionDeclarationSchemaType.NUMBER } as any;
+      }
+
+      case 'boolean': {
+        return { ...schema, type: FunctionDeclarationSchemaType.BOOLEAN } as any;
+      }
+    }
+  }
 }
 
 export default LobeGoogleAI;
diff --git a/src/libs/agent-runtime/utils/debugStream.ts b/src/libs/agent-runtime/utils/debugStream.ts
index 75dffe462ff1..4fbed582fa12 100644
--- a/src/libs/agent-runtime/utils/debugStream.ts
+++ b/src/libs/agent-runtime/utils/debugStream.ts
@@ -1,18 +1,48 @@
+// no need to introduce a package to get the current time as this module is just a debug utility
+const getTime = () => {
+  const date = new Date();
+  return `${date.getFullYear()}-${date.getDate()}-${date.getDay()} ${date.getHours()}:${date.getMinutes()}:${date.getSeconds()}.${date.getMilliseconds()}`;
+};
+
 export const debugStream = async (stream: ReadableStream) => {
-  let done = false;
+  let finished = false;
   let chunk = 0;
+  let chunkValue: any;
   const decoder = new TextDecoder();
 
   const reader = stream.getReader();
-  while (!done) {
-    const { value, done: _done } = await reader.read();
-    const chunkValue = decoder.decode(value, { stream: true });
-    if (!_done) {
-      console.log(`[chunk ${chunk}]`);
+
+  console.log(`[stream start] ${getTime()}`);
+
+  while (!finished) {
+    try {
+      const { value, done } = await reader.read();
+
+      if (done) {
+        console.log(`[stream finished] total chunks: ${chunk}\n`);
+        finished = true;
+        break;
+      }
+
+      chunkValue = value;
+
+      // if the value is ArrayBuffer, we need to decode it
+      if ('byteLength' in value) {
+        chunkValue = decoder.decode(value, { stream: true });
+      } else if (typeof value !== 'string') {
+        chunkValue = JSON.stringify(value);
+      }
+
+      console.log(`[chunk ${chunk}] ${getTime()}`);
       console.log(chunkValue);
-    }
+      console.log(`\n`);
 
-    done = _done;
-    chunk++;
+      finished = done;
+      chunk++;
+    } catch (e) {
+      finished = true;
+      console.error('[debugStream error]', e);
+      console.error('[error chunk value:]', chunkValue);
+    }
   }
 };
diff --git a/src/libs/agent-runtime/utils/streams/google-ai.ts b/src/libs/agent-runtime/utils/streams/google-ai.ts
new file mode 100644
index 000000000000..cf41e51d1747
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/google-ai.ts
@@ -0,0 +1,110 @@
+import {
+  EnhancedGenerateContentResponse,
+  GenerateContentStreamResult,
+} from '@google/generative-ai';
+import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai';
+
+import { nanoid } from '@/utils/uuid';
+
+import { ChatStreamCallbacks } from '../../types';
+import {
+  StreamProtocolChunk,
+  StreamToolCallChunk,
+  chatStreamable,
+  generateToolCallId,
+} from './protocol';
+
+const transformGoogleGenerativeAIStream = (
+  chunk: EnhancedGenerateContentResponse,
+): StreamProtocolChunk => {
+  // maybe need another structure to add support for multiple choices
+  const functionCalls = chunk.functionCalls();
+
+  if (functionCalls) {
+    return {
+      data: functionCalls.map(
+        (value, index): StreamToolCallChunk => ({
+          function: {
+            arguments: JSON.stringify(value.args),
+            name: value.name,
+          },
+          id: generateToolCallId(index, value.name),
+          index: index,
+          type: 'function',
+        }),
+      ),
+      id: nanoid(),
+      type: 'tool_calls',
+    };
+  }
+  const text = chunk.text();
+
+  return {
+    data: text,
+    id: nanoid(),
+    type: 'text',
+  };
+  // if (typeof item.delta?.content === 'string') {
+  //   return { data: item.delta.content, id: chunk.id, type: 'text' };
+  // }
+  //
+  // if (item.delta?.tool_calls) {
+  //   return {
+  //     data: item.delta.tool_calls.map((value, index) => ({
+  //       ...value,
+  //
+  //       // mistral's tool calling don't have index and function field, it's data like:
+  //       // [{"id":"xbhnmTtY7","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"A photo of a small, fluffy dog with a playful expression and wagging tail.\", \"A watercolor painting of a small, energetic dog with a glossy coat and bright eyes.\", \"A vector illustration of a small, adorable dog with a short snout and perky ears.\", \"A drawing of a small, scruffy dog with a mischievous grin and a wagging tail.\"], \"quality\": \"standard\", \"seeds\": [123456, 654321, 111222, 333444], \"size\": \"1024x1024\", \"style\": \"vivid\"}"}}]
+  //
+  //       // minimax's tool calling don't have index field, it's data like:
+  //       // [{"id":"call_function_4752059746","type":"function","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"一个流浪的地球，背景是浩瀚"}}]
+  //
+  //       // so we need to add these default values
+  //       index: typeof value.index !== 'undefined' ? value.index : index,
+  //       type: value.type || 'function',
+  //     })),
+  //     id: chunk.id,
+  //     type: 'tool_calls',
+  //   };
+  // }
+  //
+  // // 给定结束原因
+  // if (item.finish_reason) {
+  //   return { data: item.finish_reason, id: chunk.id, type: 'stop' };
+  // }
+  //
+  // if (item.delta.content === null) {
+  //   return { data: item.delta, id: chunk.id, type: 'data' };
+  // }
+  //
+  // // 其余情况下，返回 delta 和 index
+  // return {
+  //   data: { delta: item.delta, id: chunk.id, index: item.index },
+  //   id: chunk.id,
+  //   type: 'data',
+  // };
+};
+
+// only use for debug
+export const googleGenAIResultToStream = (stream: GenerateContentStreamResult) => {
+  // make the response to the streamable format
+  return readableFromAsyncIterable(chatStreamable(stream.stream));
+};
+
+export const GoogleGenerativeAIStream = (
+  rawStream: ReadableStream<EnhancedGenerateContentResponse>,
+  callbacks?: ChatStreamCallbacks,
+) =>
+  rawStream
+    .pipeThrough(
+      new TransformStream({
+        transform: (chunk, controller) => {
+          const { type, id, data } = transformGoogleGenerativeAIStream(chunk);
+
+          controller.enqueue(`id: ${id}\n`);
+          controller.enqueue(`event: ${type}\n`);
+          controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
+        },
+      }),
+    )
+    .pipeThrough(createCallbacksTransformer(callbacks));
diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts
index 1e2d3f465d57..47bec33c4374 100644
--- a/src/libs/agent-runtime/utils/streams/index.ts
+++ b/src/libs/agent-runtime/utils/streams/index.ts
@@ -1,3 +1,4 @@
+export * from './google-ai';
 export * from './openai';
 export * from './minimax';
 export * from './protocol';
diff --git a/src/libs/agent-runtime/utils/streams/minimax.ts b/src/libs/agent-runtime/utils/streams/minimax.ts
index cdab8bc26f32..cb53534d6468 100644
--- a/src/libs/agent-runtime/utils/streams/minimax.ts
+++ b/src/libs/agent-runtime/utils/streams/minimax.ts
@@ -2,7 +2,7 @@ import { createCallbacksTransformer } from 'ai';
 import OpenAI from 'openai';
 
 import { ChatStreamCallbacks } from '../../types';
-import { transformOpenAIStream } from './protocol';
+import { transformOpenAIStream } from './openai';
 
 const unit8ArrayToJSONChunk = (unit8Array: Uint8Array): OpenAI.ChatCompletionChunk => {
   const decoder = new TextDecoder();
diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts
index 8b621d2bbae1..4b6bbb401170 100644
--- a/src/libs/agent-runtime/utils/streams/openai.ts
+++ b/src/libs/agent-runtime/utils/streams/openai.ts
@@ -3,7 +3,55 @@ import OpenAI from 'openai';
 import type { Stream } from 'openai/streaming';
 
 import { ChatStreamCallbacks } from '../../types';
-import { transformOpenAIStream } from './protocol';
+import { StreamProtocolChunk, StreamToolCallChunk, generateToolCallId } from './protocol';
+
+export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): StreamProtocolChunk => {
+  // maybe need another structure to add support for multiple choices
+  const item = chunk.choices[0];
+
+  if (typeof item.delta?.content === 'string') {
+    return { data: item.delta.content, id: chunk.id, type: 'text' };
+  }
+
+  if (item.delta?.tool_calls) {
+    return {
+      data: item.delta.tool_calls.map(
+        (value, index): StreamToolCallChunk => ({
+          function: value.function,
+          id: value.id || generateToolCallId(index, value.function?.name),
+
+          // mistral's tool calling don't have index and function field, it's data like:
+          // [{"id":"xbhnmTtY7","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"A photo of a small, fluffy dog with a playful expression and wagging tail.\", \"A watercolor painting of a small, energetic dog with a glossy coat and bright eyes.\", \"A vector illustration of a small, adorable dog with a short snout and perky ears.\", \"A drawing of a small, scruffy dog with a mischievous grin and a wagging tail.\"], \"quality\": \"standard\", \"seeds\": [123456, 654321, 111222, 333444], \"size\": \"1024x1024\", \"style\": \"vivid\"}"}}]
+
+          // minimax's tool calling don't have index field, it's data like:
+          // [{"id":"call_function_4752059746","type":"function","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"一个流浪的地球，背景是浩瀚"}}]
+
+          // so we need to add these default values
+          index: typeof value.index !== 'undefined' ? value.index : index,
+          type: value.type || 'function',
+        }),
+      ),
+      id: chunk.id,
+      type: 'tool_calls',
+    };
+  }
+
+  // 给定结束原因
+  if (item.finish_reason) {
+    return { data: item.finish_reason, id: chunk.id, type: 'stop' };
+  }
+
+  if (item.delta.content === null) {
+    return { data: item.delta, id: chunk.id, type: 'data' };
+  }
+
+  // 其余情况下，返回 delta 和 index
+  return {
+    data: { delta: item.delta, id: chunk.id, index: item.index },
+    id: chunk.id,
+    type: 'data',
+  };
+};
 
 const chatStreamable = async function* (stream: AsyncIterable<OpenAI.ChatCompletionChunk>) {
   for await (const response of stream) {
diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts
index 658c36b71e1b..8c5b21cd0c96 100644
--- a/src/libs/agent-runtime/utils/streams/protocol.ts
+++ b/src/libs/agent-runtime/utils/streams/protocol.ts
@@ -6,47 +6,21 @@ export interface StreamProtocolChunk {
   type: 'text' | 'tool_calls' | 'data' | 'stop';
 }
 
-export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): StreamProtocolChunk => {
-  // maybe need another structure to add support for multiple choices
-  const item = chunk.choices[0];
-
-  if (typeof item.delta?.content === 'string') {
-    return { data: item.delta.content, id: chunk.id, type: 'text' };
-  }
-
-  if (item.delta?.tool_calls) {
-    return {
-      data: item.delta.tool_calls.map((value, index) => ({
-        ...value,
-
-        // mistral's tool calling don't have index and function field, it's data like:
-        // [{"id":"xbhnmTtY7","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"A photo of a small, fluffy dog with a playful expression and wagging tail.\", \"A watercolor painting of a small, energetic dog with a glossy coat and bright eyes.\", \"A vector illustration of a small, adorable dog with a short snout and perky ears.\", \"A drawing of a small, scruffy dog with a mischievous grin and a wagging tail.\"], \"quality\": \"standard\", \"seeds\": [123456, 654321, 111222, 333444], \"size\": \"1024x1024\", \"style\": \"vivid\"}"}}]
-
-        // minimax's tool calling don't have index field, it's data like:
-        // [{"id":"call_function_4752059746","type":"function","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"一个流浪的地球，背景是浩瀚"}}]
-
-        // so we need to add these default values
-        index: typeof value.index !== 'undefined' ? value.index : index,
-        type: value.type || 'function',
-      })),
-      id: chunk.id,
-      type: 'tool_calls',
-    };
-  }
+export interface StreamToolCallChunk {
+  function?: {
+    arguments?: string;
+    name?: string | null;
+  };
+  id: string;
+  index: number;
+  type: 'function' | string;
+}
 
-  // 给定结束原因
-  if (item.finish_reason) {
-    return { data: item.finish_reason, id: chunk.id, type: 'stop' };
-  }
+export const generateToolCallId = (index: number, functionName?: string) =>
+  `${functionName || 'unknown_tool_call'}_${index}`;
 
-  if (item.delta.content === null) {
-    return { data: item.delta, id: chunk.id, type: 'data' };
+export const chatStreamable = async function* <T>(stream: AsyncIterable<T>) {
+  for await (const response of stream) {
+    yield response;
   }
-
-  // 其余情况下，返回 delta 和 index
-  return {
-    data: { delta: item.delta, id: chunk.id, index: item.index },
-    id: chunk.id,
-    type: 'data',
-  };
 };

From ca4f30beee4434c922c5a37517575fdddba19280 Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Fri, 10 May 2024 00:03:26 +0800
Subject: [PATCH 10/24] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20anthropic?=
 =?UTF-8?q?=20tool=20calling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 package.json                                  |   2 +-
 src/config/modelProviders/anthropic.ts        |   3 +
 src/libs/agent-runtime/anthropic/index.ts     |  90 +++++++++++++---
 src/libs/agent-runtime/types/chat.ts          |   1 +
 .../agent-runtime/utils/anthropicHelpers.ts   |  64 +++++++++--
 .../agent-runtime/utils/streams/anthropic.ts  | 102 ++++++++++++++++++
 .../agent-runtime/utils/streams/google-ai.ts  |   4 +-
 src/libs/agent-runtime/utils/streams/index.ts |   1 +
 .../agent-runtime/utils/streams/openai.ts     |  11 +-
 .../agent-runtime/utils/streams/protocol.ts   |  11 +-
 10 files changed, 259 insertions(+), 30 deletions(-)
 create mode 100644 src/libs/agent-runtime/utils/streams/anthropic.ts

diff --git a/package.json b/package.json
index 3ddfca595c23..abe1a4e09871 100644
--- a/package.json
+++ b/package.json
@@ -82,7 +82,7 @@
   },
   "dependencies": {
     "@ant-design/icons": "^5.3.6",
-    "@anthropic-ai/sdk": "^0.18.0",
+    "@anthropic-ai/sdk": "^0.20.9",
     "@auth/core": "0.28.0",
     "@aws-sdk/client-bedrock-runtime": "^3.565.0",
     "@azure/openai": "^1.0.0-beta.12",
diff --git a/src/config/modelProviders/anthropic.ts b/src/config/modelProviders/anthropic.ts
index 93bcfb40f748..01907c7114f7 100644
--- a/src/config/modelProviders/anthropic.ts
+++ b/src/config/modelProviders/anthropic.ts
@@ -8,6 +8,7 @@ const Anthropic: ModelProviderCard = {
         'Ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments',
       displayName: 'Claude 3 Sonnet',
       enabled: true,
+      functionCall: true,
       id: 'claude-3-sonnet-20240229',
       maxOutput: 4096,
       tokens: 200_000,
@@ -18,6 +19,7 @@ const Anthropic: ModelProviderCard = {
         'Most powerful model for highly complex tasks. Top-level performance, intelligence, fluency, and understanding',
       displayName: 'Claude 3 Opus',
       enabled: true,
+      functionCall: true,
       id: 'claude-3-opus-20240229',
       maxOutput: 4096,
       tokens: 200_000,
@@ -28,6 +30,7 @@ const Anthropic: ModelProviderCard = {
         'Fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance',
       displayName: 'Claude 3 Haiku',
       enabled: true,
+      functionCall: true,
       id: 'claude-3-haiku-20240307',
       maxOutput: 4096,
       tokens: 200_000,
diff --git a/src/libs/agent-runtime/anthropic/index.ts b/src/libs/agent-runtime/anthropic/index.ts
index 6467ae5ded8a..936ab9b2a9eb 100644
--- a/src/libs/agent-runtime/anthropic/index.ts
+++ b/src/libs/agent-runtime/anthropic/index.ts
@@ -1,7 +1,6 @@
 // sort-imports-ignore
 import '@anthropic-ai/sdk/shims/web';
 import Anthropic from '@anthropic-ai/sdk';
-import { AnthropicStream, StreamingTextResponse } from 'ai';
 import { ClientOptions } from 'openai';
 
 import { LobeRuntimeAI } from '../BaseAI';
@@ -11,6 +10,8 @@ import { AgentRuntimeError } from '../utils/createError';
 import { debugStream } from '../utils/debugStream';
 import { desensitizeUrl } from '../utils/desensitizeUrl';
 import { buildAnthropicMessages } from '../utils/anthropicHelpers';
+import { StreamingResponse } from '../utils/response';
+import { AnthropicStream } from '../utils/streams';
 
 const DEFAULT_BASE_URL = 'https://api.anthropic.com';
 
@@ -30,18 +31,40 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
     try {
       const anthropicPayload = this.buildAnthropicPayload(payload);
 
-      const response = await this.client.messages.create(
-        { ...anthropicPayload, stream: true },
+      // if there is no tool, we can use the normal chat API
+      if (!anthropicPayload.tools || anthropicPayload.tools.length === 0) {
+        const response = await this.client.messages.create(
+          { ...anthropicPayload, stream: true },
+          {
+            signal: options?.signal,
+          },
+        );
+
+        const [prod, debug] = response.tee();
+
+        if (process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION === '1') {
+          debugStream(debug.toReadableStream()).catch(console.error);
+        }
+
+        return StreamingResponse(AnthropicStream(prod, options?.callback), {
+          headers: options?.headers,
+        });
+      }
+
+      // or we should call the tool API
+      const response = await this.client.beta.tools.messages.create(
+        { ...anthropicPayload, stream: false },
         { signal: options?.signal },
       );
 
-      const [prod, debug] = response.tee();
-
       if (process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION === '1') {
-        debugStream(debug.toReadableStream()).catch(console.error);
+        console.log('\n[no stream response]\n');
+        console.log(JSON.stringify(response) + '\n');
       }
 
-      return new StreamingTextResponse(AnthropicStream(prod, options?.callback), {
+      const stream = this.transformResponseToStream(response);
+
+      return StreamingResponse(AnthropicStream(stream, options?.callback), {
         headers: options?.headers,
       });
     } catch (error) {
@@ -85,20 +108,59 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
   }
 
   private buildAnthropicPayload(payload: ChatStreamPayload) {
-    const { messages, model, max_tokens, temperature, top_p } = payload;
+    const { messages, model, max_tokens = 4096, temperature, top_p, tools } = payload;
     const system_message = messages.find((m) => m.role === 'system');
     const user_messages = messages.filter((m) => m.role !== 'system');
 
     return {
-      max_tokens: max_tokens || 4096,
+      max_tokens,
       messages: buildAnthropicMessages(user_messages),
-      model: model,
-      stream: true,
+      model,
       system: system_message?.content as string,
-      temperature: temperature,
-      top_p: top_p,
-    };
+      temperature,
+      // TODO: Anthropic sdk don't have tools interface currently
+      // @ts-ignore
+      tools: tools?.map(
+        (tool): Anthropic.Beta.Tools.Tool => ({
+          description: tool.function.description,
+          input_schema: tool.function.parameters as Anthropic.Beta.Tools.Tool.InputSchema,
+          name: tool.function.name,
+        }),
+      ),
+      top_p,
+    } satisfies Anthropic.MessageCreateParams;
   }
+
+  private transformResponseToStream = (response: Anthropic.Beta.Tools.ToolsBetaMessage) => {
+    return new ReadableStream<Anthropic.MessageStreamEvent>({
+      start(controller) {
+        response.content.forEach((content) => {
+          switch (content.type) {
+            case 'text': {
+              controller.enqueue({
+                delta: { text: content.text, type: 'text_delta' },
+                type: 'content_block_delta',
+              } as Anthropic.ContentBlockDeltaEvent);
+              break;
+            }
+            case 'tool_use': {
+              controller.enqueue({
+                delta: {
+                  tool_use: { id: content.id, input: content.input, name: content.name },
+                  type: 'tool_use',
+                },
+                type: 'content_block_delta',
+              } as any);
+            }
+          }
+        });
+
+        controller.enqueue({ type: 'message_stop' } as Anthropic.MessageStopEvent);
+
+        controller.close();
+      },
+    });
+  };
 }
 
 export default LobeAnthropicAI;
diff --git a/src/libs/agent-runtime/types/chat.ts b/src/libs/agent-runtime/types/chat.ts
index b7b5eb918d1d..93aebf994997 100644
--- a/src/libs/agent-runtime/types/chat.ts
+++ b/src/libs/agent-runtime/types/chat.ts
@@ -32,6 +32,7 @@ export interface OpenAIChatMessage {
    * @description 消息发送者的角色
    */
   role: LLMRoleType;
+  tool_call_id?: string;
   tool_calls?: MessageToolCall[];
 }
 
diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.ts b/src/libs/agent-runtime/utils/anthropicHelpers.ts
index 0528f296a38f..02779fddd05d 100644
--- a/src/libs/agent-runtime/utils/anthropicHelpers.ts
+++ b/src/libs/agent-runtime/utils/anthropicHelpers.ts
@@ -30,13 +30,63 @@ export const buildAnthropicMessage = (
   message: OpenAIChatMessage,
 ): Anthropic.Messages.MessageParam => {
   const content = message.content as string | UserMessageContentPart[];
-  return {
-    content: typeof content === 'string' ? content : content.map((c) => buildAnthropicBlock(c)),
-    role:
-      message.role === 'tool' || message.role === 'function' || message.role === 'system'
-        ? 'assistant'
-        : message.role,
-  };
+
+  switch (message.role) {
+    case 'system': {
+      return { content: content as string, role: 'user' };
+    }
+
+    case 'user': {
+      return {
+        content: typeof content === 'string' ? content : content.map((c) => buildAnthropicBlock(c)),
+        role: 'user',
+      };
+    }
+
+    case 'tool': {
+      // refs: https://docs.anthropic.com/claude/docs/tool-use#tool-use-and-tool-result-content-blocks
+      return {
+        content: [
+          {
+            content: message.content,
+            tool_use_id: message.tool_call_id,
+            type: 'tool_result',
+          } as any,
+        ],
+        role: 'user',
+      };
+    }
+
+    case 'assistant': {
+      // if there is tool_calls , we need to covert the tool_calls to tool_use content block
+      // refs: https://docs.anthropic.com/claude/docs/tool-use#tool-use-and-tool-result-content-blocks
+      if (message.tool_calls) {
+        return {
+          content: [
+            // avoid empty text content block
+            !!message.content && {
+              text: message.content as string,
+              type: 'text',
+            },
+            ...(message.tool_calls.map((tool) => ({
+              id: tool.id,
+              input: JSON.parse(tool.function.arguments),
+              name: tool.function.name,
+              type: 'tool_use',
+            })) as any),
+          ].filter(Boolean),
+          role: 'assistant',
+        };
+      }
+
+      // or it's a plain assistant message
+      return { content: content as string, role: 'assistant' };
+    }
+
+    case 'function': {
+      return { content: content as string, role: 'assistant' };
+    }
+  }
 };
 
 export const buildAnthropicMessages = (
diff --git a/src/libs/agent-runtime/utils/streams/anthropic.ts b/src/libs/agent-runtime/utils/streams/anthropic.ts
new file mode 100644
index 000000000000..f35561b92f98
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/anthropic.ts
@@ -0,0 +1,102 @@
+import Anthropic from '@anthropic-ai/sdk';
+import type { Stream } from '@anthropic-ai/sdk/streaming';
+import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai';
+
+import { ChatStreamCallbacks } from '../../types';
+import {
+  StreamProtocolChunk,
+  StreamProtocolToolCallChunk,
+  StreamToolCallChunkData,
+} from './protocol';
+
+interface StreamStack {
+  id: string;
+}
+
+export const transformAnthropicStream = (
+  chunk: Anthropic.MessageStreamEvent,
+  stack: StreamStack,
+): StreamProtocolChunk => {
+  // maybe need another structure to add support for multiple choices
+  switch (chunk.type) {
+    case 'message_start': {
+      stack.id = chunk.message.id;
+      return { data: chunk.message, id: chunk.message.id, type: 'data' };
+    }
+
+    // case 'content_block_start': {
+    //   return { data: chunk.content_block.text, id: stack.id, type: 'data' };
+    // }
+
+    case 'content_block_delta': {
+      switch (chunk.delta.type as string) {
+        default:
+        case 'text_delta': {
+          return { data: chunk.delta.text, id: stack.id, type: 'text' };
+        }
+
+        // TODO: due to anthropic currently don't support streaming tool calling
+        // we need to add this new `tool_use` type to support streaming
+        // and maybe we need to update it when the feature is available
+        case 'tool_use': {
+          const delta = (chunk.delta as any).tool_use as Anthropic.Beta.Tools.ToolUseBlock;
+
+          const toolCall: StreamToolCallChunkData = {
+            function: { arguments: JSON.stringify(delta.input), name: delta.name },
+            id: delta.id,
+            index: 0,
+            type: 'function',
+          };
+
+          return {
+            data: [toolCall],
+            id: stack.id,
+            type: 'tool_calls',
+          } as StreamProtocolToolCallChunk;
+        }
+      }
+    }
+
+    case 'message_delta': {
+      return { data: chunk.delta.stop_reason, id: stack.id, type: 'stop' };
+    }
+
+    case 'message_stop': {
+      return { data: 'message_stop', id: stack.id, type: 'stop' };
+    }
+
+    default: {
+      return { data: chunk, id: stack.id, type: 'data' };
+    }
+  }
+};
+
+const chatStreamable = async function* (stream: AsyncIterable<Anthropic.MessageStreamEvent>) {
+  for await (const response of stream) {
+    yield response;
+  }
+};
+
+export const AnthropicStream = (
+  stream: Stream<Anthropic.MessageStreamEvent> | ReadableStream,
+  callbacks?: ChatStreamCallbacks,
+) => {
+  const streamStack: StreamStack = { id: '' };
+
+  const readableStream =
+    stream instanceof ReadableStream ? stream : readableFromAsyncIterable(chatStreamable(stream));
+
+  return readableStream
+    .pipeThrough(
+      new TransformStream({
+        transform: (chunk, controller) => {
+          const { type, id, data } = transformAnthropicStream(chunk, streamStack);
+
+          controller.enqueue(`id: ${id}\n`);
+          controller.enqueue(`event: ${type}\n`);
+          controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
+        },
+      }),
+    )
+    .pipeThrough(createCallbacksTransformer(callbacks));
+};
diff --git a/src/libs/agent-runtime/utils/streams/google-ai.ts b/src/libs/agent-runtime/utils/streams/google-ai.ts
index cf41e51d1747..46e36a7bdcf6 100644
--- a/src/libs/agent-runtime/utils/streams/google-ai.ts
+++ b/src/libs/agent-runtime/utils/streams/google-ai.ts
@@ -9,7 +9,7 @@ import { nanoid } from '@/utils/uuid';
 import { ChatStreamCallbacks } from '../../types';
 import {
   StreamProtocolChunk,
-  StreamToolCallChunk,
+  StreamToolCallChunkData,
   chatStreamable,
   generateToolCallId,
 } from './protocol';
@@ -23,7 +23,7 @@ const transformGoogleGenerativeAIStream = (
   if (functionCalls) {
     return {
       data: functionCalls.map(
-        (value, index): StreamToolCallChunk => ({
+        (value, index): StreamToolCallChunkData => ({
           function: {
             arguments: JSON.stringify(value.args),
             name: value.name,
diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts
index 47bec33c4374..c40d3e21a8e0 100644
--- a/src/libs/agent-runtime/utils/streams/index.ts
+++ b/src/libs/agent-runtime/utils/streams/index.ts
@@ -1,3 +1,4 @@
+export * from './anthropic';
 export * from './google-ai';
 export * from './openai';
 export * from './minimax';
diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts
index 4b6bbb401170..3695c6505d7a 100644
--- a/src/libs/agent-runtime/utils/streams/openai.ts
+++ b/src/libs/agent-runtime/utils/streams/openai.ts
@@ -3,7 +3,12 @@ import OpenAI from 'openai';
 import type { Stream } from 'openai/streaming';
 
 import { ChatStreamCallbacks } from '../../types';
-import { StreamProtocolChunk, StreamToolCallChunk, generateToolCallId } from './protocol';
+import {
+  StreamProtocolChunk,
+  StreamProtocolToolCallChunk,
+  StreamToolCallChunkData,
+  generateToolCallId,
+} from './protocol';
 
 export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): StreamProtocolChunk => {
   // maybe need another structure to add support for multiple choices
@@ -16,7 +21,7 @@ export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): Stream
   if (item.delta?.tool_calls) {
     return {
       data: item.delta.tool_calls.map(
-        (value, index): StreamToolCallChunk => ({
+        (value, index): StreamToolCallChunkData => ({
           function: value.function,
           id: value.id || generateToolCallId(index, value.function?.name),
 
@@ -33,7 +38,7 @@ export const transformOpenAIStream = (chunk: OpenAI.ChatCompletionChunk): Stream
       ),
       id: chunk.id,
       type: 'tool_calls',
-    };
+    } as StreamProtocolToolCallChunk;
   }
 
   // 给定结束原因
diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts
index 8c5b21cd0c96..b4f74d64e932 100644
--- a/src/libs/agent-runtime/utils/streams/protocol.ts
+++ b/src/libs/agent-runtime/utils/streams/protocol.ts
@@ -1,12 +1,10 @@
-import OpenAI from 'openai';
-
 export interface StreamProtocolChunk {
   data: any;
   id: string;
   type: 'text' | 'tool_calls' | 'data' | 'stop';
 }
 
-export interface StreamToolCallChunk {
+export interface StreamToolCallChunkData {
   function?: {
     arguments?: string;
     name?: string | null;
@@ -16,6 +14,13 @@ export interface StreamToolCallChunk {
   type: 'function' | string;
 }
 
+export interface StreamProtocolToolCallChunk {
+  data: StreamToolCallChunkData[];
+  id: string;
+  index: number;
+  type: 'tool_calls';
+}
+
 export const generateToolCallId = (index: number, functionName?: string) =>
   `${functionName || 'unknown_tool_call'}_${index}`;
 

From bd0b3f7e038315fb2454576f5f7a2fef27cfc2cf Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Fri, 10 May 2024 07:47:33 +0000
Subject: [PATCH 11/24] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20refactor?=
 =?UTF-8?q?=20ollama=20stream?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/libs/agent-runtime/ollama/index.ts        |  6 +--
 src/libs/agent-runtime/ollama/stream.ts       | 31 ------------
 .../agent-runtime/utils/streams/anthropic.ts  |  5 +-
 src/libs/agent-runtime/utils/streams/index.ts |  1 +
 .../agent-runtime/utils/streams/ollama.ts     | 48 +++++++++++++++++++
 .../agent-runtime/utils/streams/protocol.ts   |  6 ++-
 6 files changed, 58 insertions(+), 39 deletions(-)
 delete mode 100644 src/libs/agent-runtime/ollama/stream.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/ollama.ts

diff --git a/src/libs/agent-runtime/ollama/index.ts b/src/libs/agent-runtime/ollama/index.ts
index b36ec48f9ba3..80a47ad48423 100644
--- a/src/libs/agent-runtime/ollama/index.ts
+++ b/src/libs/agent-runtime/ollama/index.ts
@@ -1,15 +1,15 @@
-import { StreamingTextResponse } from 'ai';
 import { Ollama } from 'ollama/browser';
 import { ClientOptions } from 'openai';
 
 import { OpenAIChatMessage } from '@/libs/agent-runtime';
-import { OllamaStream } from '@/libs/agent-runtime/ollama/stream';
 import { ChatModelCard } from '@/types/llm';
 
 import { LobeRuntimeAI } from '../BaseAI';
 import { AgentRuntimeErrorType } from '../error';
 import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../types';
 import { AgentRuntimeError } from '../utils/createError';
+import { StreamingResponse } from '../utils/response';
+import { OllamaStream } from '../utils/streams';
 import { parseDataUri } from '../utils/uriParser';
 import { OllamaMessage } from './type';
 
@@ -51,7 +51,7 @@ export class LobeOllamaAI implements LobeRuntimeAI {
         stream: true,
       });
 
-      return new StreamingTextResponse(OllamaStream(response, options?.callback), {
+      return StreamingResponse(OllamaStream(response, options?.callback), {
         headers: options?.headers,
       });
     } catch (error) {
diff --git a/src/libs/agent-runtime/ollama/stream.ts b/src/libs/agent-runtime/ollama/stream.ts
deleted file mode 100644
index 02a019f04885..000000000000
--- a/src/libs/agent-runtime/ollama/stream.ts
+++ /dev/null
@@ -1,31 +0,0 @@
-// copy from https://github.com/vercel/ai/discussions/539#discussioncomment-8193721
-// and I have remove the unnecessary code
-import {
-  type AIStreamCallbacksAndOptions,
-  createCallbacksTransformer,
-  createStreamDataTransformer,
-  readableFromAsyncIterable,
-} from 'ai';
-import { ChatResponse } from 'ollama/browser';
-
-// A modified version of the streamable function specifically for chat messages
-const chatStreamable = async function* (stream: AsyncIterable<ChatResponse>) {
-  for await (const response of stream) {
-    if (response.message) {
-      yield response.message;
-    }
-    if (response.done) {
-      // Additional final response data can be handled here if necessary
-      return;
-    }
-  }
-};
-
-export const OllamaStream = (
-  res: AsyncIterable<ChatResponse>,
-  cb?: AIStreamCallbacksAndOptions,
-): ReadableStream<string> => {
-  return readableFromAsyncIterable(chatStreamable(res))
-    .pipeThrough(createCallbacksTransformer(cb) as any)
-    .pipeThrough(createStreamDataTransformer(cb?.experimental_streamData));
-};
diff --git a/src/libs/agent-runtime/utils/streams/anthropic.ts b/src/libs/agent-runtime/utils/streams/anthropic.ts
index f35561b92f98..ff190340d12b 100644
--- a/src/libs/agent-runtime/utils/streams/anthropic.ts
+++ b/src/libs/agent-runtime/utils/streams/anthropic.ts
@@ -6,13 +6,10 @@ import { ChatStreamCallbacks } from '../../types';
 import {
   StreamProtocolChunk,
   StreamProtocolToolCallChunk,
+  StreamStack,
   StreamToolCallChunkData,
 } from './protocol';
 
-interface StreamStack {
-  id: string;
-}
-
 export const transformAnthropicStream = (
   chunk: Anthropic.MessageStreamEvent,
   stack: StreamStack,
diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts
index c40d3e21a8e0..0a9277369426 100644
--- a/src/libs/agent-runtime/utils/streams/index.ts
+++ b/src/libs/agent-runtime/utils/streams/index.ts
@@ -1,5 +1,6 @@
 export * from './anthropic';
 export * from './google-ai';
+export * from './ollama';
 export * from './openai';
 export * from './minimax';
 export * from './protocol';
diff --git a/src/libs/agent-runtime/utils/streams/ollama.ts b/src/libs/agent-runtime/utils/streams/ollama.ts
new file mode 100644
index 000000000000..c0f5b4cc7c60
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/ollama.ts
@@ -0,0 +1,48 @@
+// copy from https://github.com/vercel/ai/discussions/539#discussioncomment-8193721
+// and I have remove the unnecessary code
+import {
+  type AIStreamCallbacksAndOptions,
+  createCallbacksTransformer,
+  readableFromAsyncIterable,
+} from 'ai';
+import { ChatResponse } from 'ollama/browser';
+
+import { nanoid } from '@/utils/uuid';
+
+import { StreamProtocolChunk, StreamStack } from './protocol';
+
+const transformOllamaStream = (chunk: ChatResponse, stack: StreamStack): StreamProtocolChunk => {
+  // maybe need another structure to add support for multiple choices
+  if (chunk.done) {
+    return { data: 'finished', id: stack.id, type: 'stop' };
+  }
+
+  return { data: chunk.message.content, id: stack.id, type: 'text' };
+};
+
+const chatStreamable = async function* (stream: AsyncIterable<ChatResponse>) {
+  for await (const response of stream) {
+    yield response;
+  }
+};
+
+export const OllamaStream = (
+  res: AsyncIterable<ChatResponse>,
+  cb?: AIStreamCallbacksAndOptions,
+): ReadableStream<string> => {
+  const streamStack: StreamStack = { id: 'chat_' + nanoid() };
+
+  return readableFromAsyncIterable(chatStreamable(res))
+    .pipeThrough(
+      new TransformStream({
+        transform: (chunk, controller) => {
+          const { type, id, data } = transformOllamaStream(chunk, streamStack);
+
+          controller.enqueue(`id: ${id}\n`);
+          controller.enqueue(`event: ${type}\n`);
+          controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
+        },
+      }),
+    )
+    .pipeThrough(createCallbacksTransformer(cb) as any);
+};
diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts
index b4f74d64e932..2746d4f736b7 100644
--- a/src/libs/agent-runtime/utils/streams/protocol.ts
+++ b/src/libs/agent-runtime/utils/streams/protocol.ts
@@ -1,6 +1,10 @@
+export interface StreamStack {
+  id: string;
+}
+
 export interface StreamProtocolChunk {
   data: any;
-  id: string;
+  id?: string;
   type: 'text' | 'tool_calls' | 'data' | 'stop';
 }
 

From 24b6abbbd10cbcc38f51d0af2b8f0cc5b742507d Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Fri, 10 May 2024 21:26:08 +0800
Subject: [PATCH 12/24] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20refactor?=
 =?UTF-8?q?=20the=20protocol=20stream=20transformer?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../agent-runtime/utils/streams/anthropic.ts  | 13 +---
 .../agent-runtime/utils/streams/google-ai.ts  | 63 +++----------------
 .../agent-runtime/utils/streams/minimax.ts    | 21 ++++---
 .../agent-runtime/utils/streams/ollama.ts     | 16 +----
 .../agent-runtime/utils/streams/openai.ts     | 13 +---
 .../agent-runtime/utils/streams/protocol.ts   | 14 +++++
 6 files changed, 41 insertions(+), 99 deletions(-)

diff --git a/src/libs/agent-runtime/utils/streams/anthropic.ts b/src/libs/agent-runtime/utils/streams/anthropic.ts
index ff190340d12b..d60f3362470e 100644
--- a/src/libs/agent-runtime/utils/streams/anthropic.ts
+++ b/src/libs/agent-runtime/utils/streams/anthropic.ts
@@ -8,6 +8,7 @@ import {
   StreamProtocolToolCallChunk,
   StreamStack,
   StreamToolCallChunkData,
+  createSSEProtocolTransformer,
 } from './protocol';
 
 export const transformAnthropicStream = (
@@ -84,16 +85,6 @@ export const AnthropicStream = (
     stream instanceof ReadableStream ? stream : readableFromAsyncIterable(chatStreamable(stream));
 
   return readableStream
-    .pipeThrough(
-      new TransformStream({
-        transform: (chunk, controller) => {
-          const { type, id, data } = transformAnthropicStream(chunk, streamStack);
-
-          controller.enqueue(`id: ${id}\n`);
-          controller.enqueue(`event: ${type}\n`);
-          controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
-        },
-      }),
-    )
+    .pipeThrough(createSSEProtocolTransformer(transformAnthropicStream, streamStack))
     .pipeThrough(createCallbacksTransformer(callbacks));
 };
diff --git a/src/libs/agent-runtime/utils/streams/google-ai.ts b/src/libs/agent-runtime/utils/streams/google-ai.ts
index 46e36a7bdcf6..92a6bfd10c8d 100644
--- a/src/libs/agent-runtime/utils/streams/google-ai.ts
+++ b/src/libs/agent-runtime/utils/streams/google-ai.ts
@@ -9,13 +9,16 @@ import { nanoid } from '@/utils/uuid';
 import { ChatStreamCallbacks } from '../../types';
 import {
   StreamProtocolChunk,
+  StreamStack,
   StreamToolCallChunkData,
   chatStreamable,
+  createSSEProtocolTransformer,
   generateToolCallId,
 } from './protocol';
 
 const transformGoogleGenerativeAIStream = (
   chunk: EnhancedGenerateContentResponse,
+  stack: StreamStack,
 ): StreamProtocolChunk => {
   // maybe need another structure to add support for multiple choices
   const functionCalls = chunk.functionCalls();
@@ -33,7 +36,7 @@ const transformGoogleGenerativeAIStream = (
           type: 'function',
         }),
       ),
-      id: nanoid(),
+      id: stack.id,
       type: 'tool_calls',
     };
   }
@@ -41,48 +44,9 @@ const transformGoogleGenerativeAIStream = (
 
   return {
     data: text,
-    id: nanoid(),
+    id: stack?.id,
     type: 'text',
   };
-  // if (typeof item.delta?.content === 'string') {
-  //   return { data: item.delta.content, id: chunk.id, type: 'text' };
-  // }
-  //
-  // if (item.delta?.tool_calls) {
-  //   return {
-  //     data: item.delta.tool_calls.map((value, index) => ({
-  //       ...value,
-  //
-  //       // mistral's tool calling don't have index and function field, it's data like:
-  //       // [{"id":"xbhnmTtY7","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"A photo of a small, fluffy dog with a playful expression and wagging tail.\", \"A watercolor painting of a small, energetic dog with a glossy coat and bright eyes.\", \"A vector illustration of a small, adorable dog with a short snout and perky ears.\", \"A drawing of a small, scruffy dog with a mischievous grin and a wagging tail.\"], \"quality\": \"standard\", \"seeds\": [123456, 654321, 111222, 333444], \"size\": \"1024x1024\", \"style\": \"vivid\"}"}}]
-  //
-  //       // minimax's tool calling don't have index field, it's data like:
-  //       // [{"id":"call_function_4752059746","type":"function","function":{"name":"lobe-image-designer____text2image____builtin","arguments":"{\"prompts\": [\"一个流浪的地球，背景是浩瀚"}}]
-  //
-  //       // so we need to add these default values
-  //       index: typeof value.index !== 'undefined' ? value.index : index,
-  //       type: value.type || 'function',
-  //     })),
-  //     id: chunk.id,
-  //     type: 'tool_calls',
-  //   };
-  // }
-  //
-  // // 给定结束原因
-  // if (item.finish_reason) {
-  //   return { data: item.finish_reason, id: chunk.id, type: 'stop' };
-  // }
-  //
-  // if (item.delta.content === null) {
-  //   return { data: item.delta, id: chunk.id, type: 'data' };
-  // }
-  //
-  // // 其余情况下，返回 delta 和 index
-  // return {
-  //   data: { delta: item.delta, id: chunk.id, index: item.index },
-  //   id: chunk.id,
-  //   type: 'data',
-  // };
 };
 
 // only use for debug
@@ -94,17 +58,10 @@ export const googleGenAIResultToStream = (stream: GenerateContentStreamResult) =
 export const GoogleGenerativeAIStream = (
   rawStream: ReadableStream<EnhancedGenerateContentResponse>,
   callbacks?: ChatStreamCallbacks,
-) =>
-  rawStream
-    .pipeThrough(
-      new TransformStream({
-        transform: (chunk, controller) => {
-          const { type, id, data } = transformGoogleGenerativeAIStream(chunk);
+) => {
+  const streamStack: StreamStack = { id: 'chat_' + nanoid() };
 
-          controller.enqueue(`id: ${id}\n`);
-          controller.enqueue(`event: ${type}\n`);
-          controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
-        },
-      }),
-    )
+  return rawStream
+    .pipeThrough(createSSEProtocolTransformer(transformGoogleGenerativeAIStream, streamStack))
     .pipeThrough(createCallbacksTransformer(callbacks));
+};
diff --git a/src/libs/agent-runtime/utils/streams/minimax.ts b/src/libs/agent-runtime/utils/streams/minimax.ts
index cb53534d6468..a517b9f794e4 100644
--- a/src/libs/agent-runtime/utils/streams/minimax.ts
+++ b/src/libs/agent-runtime/utils/streams/minimax.ts
@@ -3,6 +3,7 @@ import OpenAI from 'openai';
 
 import { ChatStreamCallbacks } from '../../types';
 import { transformOpenAIStream } from './openai';
+import { createSSEProtocolTransformer } from './protocol';
 
 const unit8ArrayToJSONChunk = (unit8Array: Uint8Array): OpenAI.ChatCompletionChunk => {
   const decoder = new TextDecoder();
@@ -17,22 +18,22 @@ const unit8ArrayToJSONChunk = (unit8Array: Uint8Array): OpenAI.ChatCompletionChu
     chunkValue = chunkValue.slice(5).trim();
   }
 
-  return JSON.parse(chunkValue);
+  try {
+    return JSON.parse(chunkValue);
+  } catch (e) {
+    console.error('minimax chunk parse error:', e);
+
+    return { raw: chunkValue } as any;
+  }
 };
 
 export const MinimaxStream = (stream: ReadableStream, callbacks?: ChatStreamCallbacks) => {
   return stream
     .pipeThrough(
-      new TransformStream({
-        transform: (buffer, controller) => {
-          const chunk = unit8ArrayToJSONChunk(buffer);
-
-          const { type, id, data } = transformOpenAIStream(chunk);
+      createSSEProtocolTransformer((buffer) => {
+        const chunk = unit8ArrayToJSONChunk(buffer);
 
-          controller.enqueue(`id: ${id}\n`);
-          controller.enqueue(`event: ${type}\n`);
-          controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
-        },
+        return transformOpenAIStream(chunk);
       }),
     )
     .pipeThrough(createCallbacksTransformer(callbacks));
diff --git a/src/libs/agent-runtime/utils/streams/ollama.ts b/src/libs/agent-runtime/utils/streams/ollama.ts
index c0f5b4cc7c60..728b78dedff2 100644
--- a/src/libs/agent-runtime/utils/streams/ollama.ts
+++ b/src/libs/agent-runtime/utils/streams/ollama.ts
@@ -1,5 +1,3 @@
-// copy from https://github.com/vercel/ai/discussions/539#discussioncomment-8193721
-// and I have remove the unnecessary code
 import {
   type AIStreamCallbacksAndOptions,
   createCallbacksTransformer,
@@ -9,7 +7,7 @@ import { ChatResponse } from 'ollama/browser';
 
 import { nanoid } from '@/utils/uuid';
 
-import { StreamProtocolChunk, StreamStack } from './protocol';
+import { StreamProtocolChunk, StreamStack, createSSEProtocolTransformer } from './protocol';
 
 const transformOllamaStream = (chunk: ChatResponse, stack: StreamStack): StreamProtocolChunk => {
   // maybe need another structure to add support for multiple choices
@@ -33,16 +31,6 @@ export const OllamaStream = (
   const streamStack: StreamStack = { id: 'chat_' + nanoid() };
 
   return readableFromAsyncIterable(chatStreamable(res))
-    .pipeThrough(
-      new TransformStream({
-        transform: (chunk, controller) => {
-          const { type, id, data } = transformOllamaStream(chunk, streamStack);
-
-          controller.enqueue(`id: ${id}\n`);
-          controller.enqueue(`event: ${type}\n`);
-          controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
-        },
-      }),
-    )
+    .pipeThrough(createSSEProtocolTransformer(transformOllamaStream, streamStack))
     .pipeThrough(createCallbacksTransformer(cb) as any);
 };
diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts
index 3695c6505d7a..908c6575a6fd 100644
--- a/src/libs/agent-runtime/utils/streams/openai.ts
+++ b/src/libs/agent-runtime/utils/streams/openai.ts
@@ -7,6 +7,7 @@ import {
   StreamProtocolChunk,
   StreamProtocolToolCallChunk,
   StreamToolCallChunkData,
+  createSSEProtocolTransformer,
   generateToolCallId,
 } from './protocol';
 
@@ -72,16 +73,6 @@ export const OpenAIStream = (
     stream instanceof ReadableStream ? stream : readableFromAsyncIterable(chatStreamable(stream));
 
   return readableStream
-    .pipeThrough(
-      new TransformStream({
-        transform: (chunk, controller) => {
-          const { type, id, data } = transformOpenAIStream(chunk);
-
-          controller.enqueue(`id: ${id}\n`);
-          controller.enqueue(`event: ${type}\n`);
-          controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
-        },
-      }),
-    )
+    .pipeThrough(createSSEProtocolTransformer(transformOpenAIStream))
     .pipeThrough(createCallbacksTransformer(callbacks));
 };
diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts
index 2746d4f736b7..408220fbb77d 100644
--- a/src/libs/agent-runtime/utils/streams/protocol.ts
+++ b/src/libs/agent-runtime/utils/streams/protocol.ts
@@ -33,3 +33,17 @@ export const chatStreamable = async function* <T>(stream: AsyncIterable<T>) {
     yield response;
   }
 };
+
+export const createSSEProtocolTransformer = (
+  transformer: (chunk: any, stack: StreamStack) => StreamProtocolChunk,
+  streamStack?: StreamStack,
+) =>
+  new TransformStream({
+    transform: (chunk, controller) => {
+      const { type, id, data } = transformer(chunk, streamStack || { id: '' });
+
+      controller.enqueue(`id: ${id}\n`);
+      controller.enqueue(`event: ${type}\n`);
+      controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
+    },
+  });

From 83cf6ecee95d8b26343e544f90a40064f662104a Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Fri, 10 May 2024 21:39:55 +0800
Subject: [PATCH 13/24] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20refactor?=
 =?UTF-8?q?=20the=20bedrock=20stream?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/libs/agent-runtime/anthropic/index.ts     | 10 +---
 src/libs/agent-runtime/bedrock/index.ts       | 42 +++++++++--------
 .../agent-runtime/utils/anthropicHelpers.ts   | 10 ++++
 .../utils/streams/bedrock/claude.ts           | 21 +++++++++
 .../utils/streams/bedrock/common.ts           | 32 +++++++++++++
 .../utils/streams/bedrock/index.ts            |  3 ++
 .../utils/streams/bedrock/llama.ts            | 46 +++++++++++++++++++
 src/libs/agent-runtime/utils/streams/index.ts |  1 +
 8 files changed, 139 insertions(+), 26 deletions(-)
 create mode 100644 src/libs/agent-runtime/utils/streams/bedrock/claude.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/bedrock/common.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/bedrock/index.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/bedrock/llama.ts

diff --git a/src/libs/agent-runtime/anthropic/index.ts b/src/libs/agent-runtime/anthropic/index.ts
index 936ab9b2a9eb..d4a969ffda67 100644
--- a/src/libs/agent-runtime/anthropic/index.ts
+++ b/src/libs/agent-runtime/anthropic/index.ts
@@ -9,7 +9,7 @@ import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../typ
 import { AgentRuntimeError } from '../utils/createError';
 import { debugStream } from '../utils/debugStream';
 import { desensitizeUrl } from '../utils/desensitizeUrl';
-import { buildAnthropicMessages } from '../utils/anthropicHelpers';
+import { buildAnthropicMessages, buildAnthropicTools } from '../utils/anthropicHelpers';
 import { StreamingResponse } from '../utils/response';
 import { AnthropicStream } from '../utils/streams';
 
@@ -120,13 +120,7 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
       temperature,
       // TODO: Anthropic sdk don't have tools interface currently
       // @ts-ignore
-      tools: tools?.map(
-        (tool): Anthropic.Beta.Tools.Tool => ({
-          description: tool.function.description,
-          input_schema: tool.function.parameters as Anthropic.Beta.Tools.Tool.InputSchema,
-          name: tool.function.name,
-        }),
-      ),
+      tools: buildAnthropicTools(tools),
       top_p,
     } satisfies Anthropic.MessageCreateParams;
   }
diff --git a/src/libs/agent-runtime/bedrock/index.ts b/src/libs/agent-runtime/bedrock/index.ts
index 1f1a88ca17d0..ff940b0bd44e 100644
--- a/src/libs/agent-runtime/bedrock/index.ts
+++ b/src/libs/agent-runtime/bedrock/index.ts
@@ -2,7 +2,6 @@ import {
   BedrockRuntimeClient,
   InvokeModelWithResponseStreamCommand,
 } from '@aws-sdk/client-bedrock-runtime';
-import { AWSBedrockLlama2Stream, AWSBedrockStream, StreamingTextResponse } from 'ai';
 import { experimental_buildLlama2Prompt } from 'ai/prompts';
 
 import { LobeRuntimeAI } from '../BaseAI';
@@ -11,6 +10,12 @@ import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../typ
 import { buildAnthropicMessages } from '../utils/anthropicHelpers';
 import { AgentRuntimeError } from '../utils/createError';
 import { debugStream } from '../utils/debugStream';
+import { StreamingResponse } from '../utils/response';
+import {
+  AWSBedrockClaudeStream,
+  AWSBedrockLlamaStream,
+  createBedrockStream,
+} from '../utils/streams';
 
 export interface LobeBedrockAIParams {
   accessKeyId?: string;
@@ -39,7 +44,7 @@ export class LobeBedrockAI implements LobeRuntimeAI {
   }
 
   async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions) {
-    if (payload.model.startsWith('meta')) return this.invokeLlamaModel(payload);
+    if (payload.model.startsWith('meta')) return this.invokeLlamaModel(payload, options);
 
     return this.invokeClaudeModel(payload, options);
   }
@@ -47,7 +52,7 @@ export class LobeBedrockAI implements LobeRuntimeAI {
   private invokeClaudeModel = async (
     payload: ChatStreamPayload,
     options?: ChatCompetitionOptions,
-  ): Promise<StreamingTextResponse> => {
+  ): Promise<Response> => {
     const { max_tokens, messages, model, temperature, top_p } = payload;
     const system_message = messages.find((m) => m.role === 'system');
     const user_messages = messages.filter((m) => m.role !== 'system');
@@ -68,23 +73,20 @@ export class LobeBedrockAI implements LobeRuntimeAI {
 
     try {
       // Ask Claude for a streaming chat completion given the prompt
-      const bedrockResponse = await this.client.send(command, { abortSignal: options?.signal });
+      const res = await this.client.send(command, { abortSignal: options?.signal });
 
-      // Convert the response into a friendly text-stream
-      const stream = AWSBedrockStream(
-        bedrockResponse,
-        options?.callback,
-        (chunk) => chunk.delta?.text,
-      );
+      const claudeStream = createBedrockStream(res);
 
-      const [debug, output] = stream.tee();
+      const [prod, debug] = claudeStream.tee();
 
       if (process.env.DEBUG_BEDROCK_CHAT_COMPLETION === '1') {
         debugStream(debug).catch(console.error);
       }
 
       // Respond with the stream
-      return new StreamingTextResponse(output);
+      return StreamingResponse(AWSBedrockClaudeStream(prod, options?.callback), {
+        headers: options?.headers,
+      });
     } catch (e) {
       const err = e as Error & { $metadata: any };
 
@@ -101,7 +103,10 @@ export class LobeBedrockAI implements LobeRuntimeAI {
     }
   };
 
-  private invokeLlamaModel = async (payload: ChatStreamPayload): Promise<StreamingTextResponse> => {
+  private invokeLlamaModel = async (
+    payload: ChatStreamPayload,
+    options?: ChatCompetitionOptions,
+  ): Promise<Response> => {
     const { max_tokens, messages, model } = payload;
     const command = new InvokeModelWithResponseStreamCommand({
       accept: 'application/json',
@@ -115,18 +120,19 @@ export class LobeBedrockAI implements LobeRuntimeAI {
 
     try {
       // Ask Claude for a streaming chat completion given the prompt
-      const bedrockResponse = await this.client.send(command);
+      const res = await this.client.send(command);
 
-      // Convert the response into a friendly text-stream
-      const stream = AWSBedrockLlama2Stream(bedrockResponse);
+      const stream = createBedrockStream(res);
 
-      const [debug, output] = stream.tee();
+      const [prod, debug] = stream.tee();
 
       if (process.env.DEBUG_BEDROCK_CHAT_COMPLETION === '1') {
         debugStream(debug).catch(console.error);
       }
       // Respond with the stream
-      return new StreamingTextResponse(output);
+      return StreamingResponse(AWSBedrockLlamaStream(prod, options?.callback), {
+        headers: options?.headers,
+      });
     } catch (e) {
       const err = e as Error & { $metadata: any };
 
diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.ts b/src/libs/agent-runtime/utils/anthropicHelpers.ts
index 02779fddd05d..1617622f38e1 100644
--- a/src/libs/agent-runtime/utils/anthropicHelpers.ts
+++ b/src/libs/agent-runtime/utils/anthropicHelpers.ts
@@ -1,4 +1,5 @@
 import Anthropic from '@anthropic-ai/sdk';
+import OpenAI from 'openai';
 
 import { OpenAIChatMessage, UserMessageContentPart } from '../types';
 import { parseDataUri } from './uriParser';
@@ -108,3 +109,12 @@ export const buildAnthropicMessages = (
 
   return messages;
 };
+
+export const buildAnthropicTools = (tools?: OpenAI.ChatCompletionTool[]) =>
+  tools?.map(
+    (tool): Anthropic.Beta.Tools.Tool => ({
+      description: tool.function.description,
+      input_schema: tool.function.parameters as Anthropic.Beta.Tools.Tool.InputSchema,
+      name: tool.function.name,
+    }),
+  );
diff --git a/src/libs/agent-runtime/utils/streams/bedrock/claude.ts b/src/libs/agent-runtime/utils/streams/bedrock/claude.ts
new file mode 100644
index 000000000000..bfd2945b8329
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/bedrock/claude.ts
@@ -0,0 +1,21 @@
+import { InvokeModelWithResponseStreamResponse } from '@aws-sdk/client-bedrock-runtime';
+import { type AIStreamCallbacksAndOptions, createCallbacksTransformer } from 'ai';
+
+import { nanoid } from '@/utils/uuid';
+
+import { transformAnthropicStream } from '../anthropic';
+import { StreamStack, createSSEProtocolTransformer } from '../protocol';
+import { createBedrockStream } from './common';
+
+export const AWSBedrockClaudeStream = (
+  res: InvokeModelWithResponseStreamResponse | ReadableStream,
+  cb?: AIStreamCallbacksAndOptions,
+): ReadableStream<string> => {
+  const streamStack: StreamStack = { id: 'chat_' + nanoid() };
+
+  const stream = res instanceof ReadableStream ? res : createBedrockStream(res);
+
+  return stream
+    .pipeThrough(createSSEProtocolTransformer(transformAnthropicStream, streamStack))
+    .pipeThrough(createCallbacksTransformer(cb) as any);
+};
diff --git a/src/libs/agent-runtime/utils/streams/bedrock/common.ts b/src/libs/agent-runtime/utils/streams/bedrock/common.ts
new file mode 100644
index 000000000000..dbc93b7637c3
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/bedrock/common.ts
@@ -0,0 +1,32 @@
+import {
+  InvokeModelWithResponseStreamResponse,
+  ResponseStream,
+} from '@aws-sdk/client-bedrock-runtime';
+import { readableFromAsyncIterable } from 'ai';
+
+const chatStreamable = async function* (stream: AsyncIterable<ResponseStream>) {
+  for await (const response of stream) {
+    if (response.chunk) {
+      const decoder = new TextDecoder();
+
+      const value = decoder.decode(response.chunk.bytes, { stream: true });
+      try {
+        const chunk = JSON.parse(value);
+
+        yield chunk;
+      } catch (e) {
+        console.log('bedrock stream parser error:', e);
+
+        yield value;
+      }
+    } else {
+      yield response;
+    }
+  }
+};
+
+/**
+ * covert the bedrock response to a readable stream
+ */
+export const createBedrockStream = (res: InvokeModelWithResponseStreamResponse) =>
+  readableFromAsyncIterable(chatStreamable(res.body!));
diff --git a/src/libs/agent-runtime/utils/streams/bedrock/index.ts b/src/libs/agent-runtime/utils/streams/bedrock/index.ts
new file mode 100644
index 000000000000..a25c3d7a6b43
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/bedrock/index.ts
@@ -0,0 +1,3 @@
+export * from './claude';
+export * from './common';
+export * from './llama';
diff --git a/src/libs/agent-runtime/utils/streams/bedrock/llama.ts b/src/libs/agent-runtime/utils/streams/bedrock/llama.ts
new file mode 100644
index 000000000000..adec93f736a5
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/bedrock/llama.ts
@@ -0,0 +1,46 @@
+import { InvokeModelWithResponseStreamResponse } from '@aws-sdk/client-bedrock-runtime';
+import { type AIStreamCallbacksAndOptions, createCallbacksTransformer } from 'ai';
+
+import { nanoid } from '@/utils/uuid';
+
+import { StreamProtocolChunk, StreamStack, createSSEProtocolTransformer } from '../protocol';
+import { createBedrockStream } from './common';
+
+interface AmazonBedrockInvocationMetrics {
+  firstByteLatency: number;
+  inputTokenCount: number;
+  invocationLatency: number;
+  outputTokenCount: number;
+}
+interface BedrockLlamaStreamChunk {
+  'amazon-bedrock-invocationMetrics'?: AmazonBedrockInvocationMetrics;
+  'generation': string;
+  'generation_token_count': number;
+  'prompt_token_count'?: number | null;
+  'stop_reason'?: null | 'stop' | string;
+}
+
+export const transformLlamaStream = (
+  chunk: BedrockLlamaStreamChunk,
+  stack: StreamStack,
+): StreamProtocolChunk => {
+  // maybe need another structure to add support for multiple choices
+  if (chunk.stop_reason) {
+    return { data: 'finished', id: stack.id, type: 'stop' };
+  }
+
+  return { data: chunk.generation, id: stack.id, type: 'text' };
+};
+
+export const AWSBedrockLlamaStream = (
+  res: InvokeModelWithResponseStreamResponse | ReadableStream,
+  cb?: AIStreamCallbacksAndOptions,
+): ReadableStream<string> => {
+  const streamStack: StreamStack = { id: 'chat_' + nanoid() };
+
+  const stream = res instanceof ReadableStream ? res : createBedrockStream(res);
+
+  return stream
+    .pipeThrough(createSSEProtocolTransformer(transformLlamaStream, streamStack))
+    .pipeThrough(createCallbacksTransformer(cb) as any);
+};
diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts
index 0a9277369426..6601b493fce8 100644
--- a/src/libs/agent-runtime/utils/streams/index.ts
+++ b/src/libs/agent-runtime/utils/streams/index.ts
@@ -1,4 +1,5 @@
 export * from './anthropic';
+export * from './bedrock';
 export * from './google-ai';
 export * from './ollama';
 export * from './openai';

From fb59d4f7faf0bc091a79539a9241990681c88dcd Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Fri, 10 May 2024 21:48:53 +0800
Subject: [PATCH 14/24] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20remove?=
 =?UTF-8?q?=20Stream=20Text=20Response?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/libs/agent-runtime/BaseAI.ts              | 11 ++---------
 src/libs/agent-runtime/utils/streams/index.ts |  2 +-
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/libs/agent-runtime/BaseAI.ts b/src/libs/agent-runtime/BaseAI.ts
index 44ba7e20d16d..301dc6523ba8 100644
--- a/src/libs/agent-runtime/BaseAI.ts
+++ b/src/libs/agent-runtime/BaseAI.ts
@@ -1,4 +1,3 @@
-import { StreamingTextResponse } from 'ai';
 import OpenAI from 'openai';
 
 import { ChatModelCard } from '@/types/llm';
@@ -7,10 +6,7 @@ import { ChatCompetitionOptions, ChatStreamPayload } from './types';
 
 export interface LobeRuntimeAI {
   baseURL?: string;
-  chat(
-    payload: ChatStreamPayload,
-    options?: ChatCompetitionOptions,
-  ): Promise<StreamingTextResponse>;
+  chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions): Promise<Response>;
 
   models?(): Promise<any>;
 }
@@ -19,10 +15,7 @@ export abstract class LobeOpenAICompatibleRuntime {
   abstract baseURL: string;
   abstract client: OpenAI;
 
-  abstract chat(
-    payload: ChatStreamPayload,
-    options?: ChatCompetitionOptions,
-  ): Promise<StreamingTextResponse>;
+  abstract chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions): Promise<Response>;
 
   abstract models(): Promise<ChatModelCard[]>;
 }
diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts
index 6601b493fce8..deb4a934c541 100644
--- a/src/libs/agent-runtime/utils/streams/index.ts
+++ b/src/libs/agent-runtime/utils/streams/index.ts
@@ -1,7 +1,7 @@
 export * from './anthropic';
 export * from './bedrock';
 export * from './google-ai';
+export * from './minimax';
 export * from './ollama';
 export * from './openai';
-export * from './minimax';
 export * from './protocol';

From dc4d0c2cac125c9c85a2b8b09a7a4b136c9b4346 Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Fri, 10 May 2024 22:38:30 +0800
Subject: [PATCH 15/24] =?UTF-8?q?=E2=9C=85=20test:=20clean=20some=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/libs/agent-runtime/groq/index.test.ts     |  74 +---
 src/libs/agent-runtime/minimax/index.test.ts  |   4 +-
 src/libs/agent-runtime/mistral/index.test.ts  |  53 ---
 src/libs/agent-runtime/moonshot/index.test.ts |  72 +---
 src/libs/agent-runtime/openai/index.test.ts   |  53 ---
 .../agent-runtime/openrouter/index.test.ts    |  54 +--
 .../agent-runtime/perplexity/index.test.ts    |  71 ----
 src/libs/agent-runtime/perplexity/index.ts    |   5 +-
 .../agent-runtime/togetherai/index.test.ts    |  54 +--
 .../utils/anthropicHelpers.test.ts            |   2 +-
 .../openaiCompatibleFactory/index.test.ts     | 382 ++++++++++++++++++
 .../utils/openaiCompatibleFactory/index.ts    |   5 +-
 src/libs/agent-runtime/zeroone/index.test.ts  |  54 +--
 src/libs/agent-runtime/zhipu/index.test.ts    |   2 +-
 14 files changed, 398 insertions(+), 487 deletions(-)
 create mode 100644 src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts

diff --git a/src/libs/agent-runtime/groq/index.test.ts b/src/libs/agent-runtime/groq/index.test.ts
index 67779be2eb26..e8363720fdb0 100644
--- a/src/libs/agent-runtime/groq/index.test.ts
+++ b/src/libs/agent-runtime/groq/index.test.ts
@@ -40,25 +40,7 @@ describe('LobeGroqAI', () => {
   });
 
   describe('chat', () => {
-    it('should return a StreamingTextResponse on successful API call', async () => {
-      // Arrange
-      const mockStream = new ReadableStream();
-      const mockResponse = Promise.resolve(mockStream);
-
-      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
-
-      // Act
-      const result = await instance.chat({
-        messages: [{ content: 'Hello', role: 'user' }],
-        model: 'mistralai/mistral-7b-instruct:free',
-        temperature: 0,
-      });
-
-      // Assert
-      expect(result).toBeInstanceOf(Response);
-    });
-
-    it('should call OpenRouter API with corresponding options', async () => {
+    it('should call chat with corresponding options', async () => {
       // Arrange
       const mockStream = new ReadableStream();
       const mockResponse = Promise.resolve(mockStream);
@@ -78,6 +60,7 @@ describe('LobeGroqAI', () => {
       expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
         {
           max_tokens: 1024,
+          stream: true,
           messages: [{ content: 'Hello', role: 'user' }],
           model: 'mistralai/mistral-7b-instruct:free',
           temperature: 0.7,
@@ -253,59 +236,6 @@ describe('LobeGroqAI', () => {
       });
     });
 
-    describe('LobeGroqAI chat with callback and headers', () => {
-      it('should handle callback and headers correctly', async () => {
-        // 模拟 chat.completions.create 方法返回一个可读流
-        const mockCreateMethod = vi
-          .spyOn(instance['client'].chat.completions, 'create')
-          .mockResolvedValue(
-            new ReadableStream({
-              start(controller) {
-                controller.enqueue({
-                  id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
-                  object: 'chat.completion.chunk',
-                  created: 1709125675,
-                  model: 'mistralai/mistral-7b-instruct:free',
-                  system_fingerprint: 'fp_86156a94a0',
-                  choices: [
-                    { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null },
-                  ],
-                });
-                controller.close();
-              },
-            }) as any,
-          );
-
-        // 准备 callback 和 headers
-        const mockCallback: ChatStreamCallbacks = {
-          onStart: vi.fn(),
-          onToken: vi.fn(),
-        };
-        const mockHeaders = { 'Custom-Header': 'TestValue' };
-
-        // 执行测试
-        const result = await instance.chat(
-          {
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          },
-          { callback: mockCallback, headers: mockHeaders },
-        );
-
-        // 验证 callback 被调用
-        await result.text(); // 确保流被消费
-        expect(mockCallback.onStart).toHaveBeenCalled();
-        expect(mockCallback.onToken).toHaveBeenCalledWith('hello');
-
-        // 验证 headers 被正确传递
-        expect(result.headers.get('Custom-Header')).toEqual('TestValue');
-
-        // 清理
-        mockCreateMethod.mockRestore();
-      });
-    });
-
     describe('DEBUG', () => {
       it('should call debugStream and return StreamingTextResponse when DEBUG_OPENROUTER_CHAT_COMPLETION is 1', async () => {
         // Arrange
diff --git a/src/libs/agent-runtime/minimax/index.test.ts b/src/libs/agent-runtime/minimax/index.test.ts
index 49c9ac5c3edd..efef06500edf 100644
--- a/src/libs/agent-runtime/minimax/index.test.ts
+++ b/src/libs/agent-runtime/minimax/index.test.ts
@@ -62,7 +62,7 @@ describe('LobeMinimaxAI', () => {
         temperature: 0,
       });
 
-      expect(result).toBeInstanceOf(StreamingTextResponse);
+      expect(result).toBeInstanceOf(Response);
     });
 
     it('should handle text messages correctly', async () => {
@@ -85,7 +85,7 @@ describe('LobeMinimaxAI', () => {
         temperature: 0,
       });
 
-      expect(result).toBeInstanceOf(StreamingTextResponse);
+      expect(result).toBeInstanceOf(Response);
     });
 
     it('should call debugStream in DEBUG mode', async () => {
diff --git a/src/libs/agent-runtime/mistral/index.test.ts b/src/libs/agent-runtime/mistral/index.test.ts
index 1250d69ddc8b..7cb12c068fa8 100644
--- a/src/libs/agent-runtime/mistral/index.test.ts
+++ b/src/libs/agent-runtime/mistral/index.test.ts
@@ -287,59 +287,6 @@ describe('LobeMistralAI', () => {
       });
     });
 
-    describe('LobeMistralAI chat with callback and headers', () => {
-      it('should handle callback and headers correctly', async () => {
-        // 模拟 chat.completions.create 方法返回一个可读流
-        const mockCreateMethod = vi
-          .spyOn(instance['client'].chat.completions, 'create')
-          .mockResolvedValue(
-            new ReadableStream({
-              start(controller) {
-                controller.enqueue({
-                  id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
-                  object: 'chat.completion.chunk',
-                  created: 1709125675,
-                  model: 'open-mistral-7b',
-                  system_fingerprint: 'fp_86156a94a0',
-                  choices: [
-                    { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null },
-                  ],
-                });
-                controller.close();
-              },
-            }) as any,
-          );
-
-        // 准备 callback 和 headers
-        const mockCallback: ChatStreamCallbacks = {
-          onStart: vi.fn(),
-          onToken: vi.fn(),
-        };
-        const mockHeaders = { 'Custom-Header': 'TestValue' };
-
-        // 执行测试
-        const result = await instance.chat(
-          {
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'open-mistral-7b',
-            temperature: 0,
-          },
-          { callback: mockCallback, headers: mockHeaders },
-        );
-
-        // 验证 callback 被调用
-        await result.text(); // 确保流被消费
-        expect(mockCallback.onStart).toHaveBeenCalled();
-        expect(mockCallback.onToken).toHaveBeenCalledWith('hello');
-
-        // 验证 headers 被正确传递
-        expect(result.headers.get('Custom-Header')).toEqual('TestValue');
-
-        // 清理
-        mockCreateMethod.mockRestore();
-      });
-    });
-
     describe('DEBUG', () => {
       it('should call debugStream and return StreamingTextResponse when DEBUG_MISTRAL_CHAT_COMPLETION is 1', async () => {
         // Arrange
diff --git a/src/libs/agent-runtime/moonshot/index.test.ts b/src/libs/agent-runtime/moonshot/index.test.ts
index 16c0e69e319d..6de7a29cb2c8 100644
--- a/src/libs/agent-runtime/moonshot/index.test.ts
+++ b/src/libs/agent-runtime/moonshot/index.test.ts
@@ -40,24 +40,6 @@ describe('LobeMoonshotAI', () => {
   });
 
   describe('chat', () => {
-    it('should return a StreamingTextResponse on successful API call', async () => {
-      // Arrange
-      const mockStream = new ReadableStream();
-      const mockResponse = Promise.resolve(mockStream);
-
-      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
-
-      // Act
-      const result = await instance.chat({
-        messages: [{ content: 'Hello', role: 'user' }],
-        model: 'text-davinci-003',
-        temperature: 0,
-      });
-
-      // Assert
-      expect(result).toBeInstanceOf(Response);
-    });
-
     describe('Error', () => {
       it('should return OpenAIBizError with an openai error response when OpenAI.APIError is thrown', async () => {
         // Arrange
@@ -223,59 +205,6 @@ describe('LobeMoonshotAI', () => {
       });
     });
 
-    describe('LobeMoonshotAI chat with callback and headers', () => {
-      it('should handle callback and headers correctly', async () => {
-        // 模拟 chat.completions.create 方法返回一个可读流
-        const mockCreateMethod = vi
-          .spyOn(instance['client'].chat.completions, 'create')
-          .mockResolvedValue(
-            new ReadableStream({
-              start(controller) {
-                controller.enqueue({
-                  id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
-                  object: 'chat.completion.chunk',
-                  created: 1709125675,
-                  model: 'gpt-3.5-turbo-0125',
-                  system_fingerprint: 'fp_86156a94a0',
-                  choices: [
-                    { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null },
-                  ],
-                });
-                controller.close();
-              },
-            }) as any,
-          );
-
-        // 准备 callback 和 headers
-        const mockCallback: ChatStreamCallbacks = {
-          onStart: vi.fn(),
-          onToken: vi.fn(),
-        };
-        const mockHeaders = { 'Custom-Header': 'TestValue' };
-
-        // 执行测试
-        const result = await instance.chat(
-          {
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'text-davinci-003',
-            temperature: 0,
-          },
-          { callback: mockCallback, headers: mockHeaders },
-        );
-
-        // 验证 callback 被调用
-        await result.text(); // 确保流被消费
-        expect(mockCallback.onStart).toHaveBeenCalled();
-        expect(mockCallback.onToken).toHaveBeenCalledWith('hello');
-
-        // 验证 headers 被正确传递
-        expect(result.headers.get('Custom-Header')).toEqual('TestValue');
-
-        // 清理
-        mockCreateMethod.mockRestore();
-      });
-    });
-
     describe('DEBUG', () => {
       it('should call debugStream and return StreamingTextResponse when DEBUG_MOONSHOT_CHAT_COMPLETION is 1', async () => {
         // Arrange
@@ -306,6 +235,7 @@ describe('LobeMoonshotAI', () => {
         await instance.chat({
           messages: [{ content: 'Hello', role: 'user' }],
           model: 'text-davinci-003',
+          stream: true,
           temperature: 0,
         });
 
diff --git a/src/libs/agent-runtime/openai/index.test.ts b/src/libs/agent-runtime/openai/index.test.ts
index 6a4e8c9e508c..ef1591128314 100644
--- a/src/libs/agent-runtime/openai/index.test.ts
+++ b/src/libs/agent-runtime/openai/index.test.ts
@@ -190,59 +190,6 @@ describe('LobeOpenAI', () => {
       });
     });
 
-    describe('LobeOpenAI chat with callback and headers', () => {
-      it('should handle callback and headers correctly', async () => {
-        // 模拟 chat.completions.create 方法返回一个可读流
-        const mockCreateMethod = vi
-          .spyOn(instance['client'].chat.completions, 'create')
-          .mockResolvedValue(
-            new ReadableStream({
-              start(controller) {
-                controller.enqueue({
-                  id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
-                  object: 'chat.completion.chunk',
-                  created: 1709125675,
-                  model: 'gpt-3.5-turbo-0125',
-                  system_fingerprint: 'fp_86156a94a0',
-                  choices: [
-                    { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null },
-                  ],
-                });
-                controller.close();
-              },
-            }) as any,
-          );
-
-        // 准备 callback 和 headers
-        const mockCallback: ChatStreamCallbacks = {
-          onStart: vi.fn(),
-          onToken: vi.fn(),
-        };
-        const mockHeaders = { 'Custom-Header': 'TestValue' };
-
-        // 执行测试
-        const result = await instance.chat(
-          {
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'text-davinci-003',
-            temperature: 0,
-          },
-          { callback: mockCallback, headers: mockHeaders },
-        );
-
-        // 验证 callback 被调用
-        await result.text(); // 确保流被消费
-        expect(mockCallback.onStart).toHaveBeenCalled();
-        expect(mockCallback.onToken).toHaveBeenCalledWith('hello');
-
-        // 验证 headers 被正确传递
-        expect(result.headers.get('Custom-Header')).toEqual('TestValue');
-
-        // 清理
-        mockCreateMethod.mockRestore();
-      });
-    });
-
     describe('DEBUG', () => {
       it('should call debugStream and return StreamingTextResponse when DEBUG_OPENAI_CHAT_COMPLETION is 1', async () => {
         // Arrange
diff --git a/src/libs/agent-runtime/openrouter/index.test.ts b/src/libs/agent-runtime/openrouter/index.test.ts
index 306c9126af4c..96752c349b43 100644
--- a/src/libs/agent-runtime/openrouter/index.test.ts
+++ b/src/libs/agent-runtime/openrouter/index.test.ts
@@ -81,6 +81,7 @@ describe('LobeOpenRouterAI', () => {
         {
           max_tokens: 1024,
           messages: [{ content: 'Hello', role: 'user' }],
+          stream: true,
           model: 'mistralai/mistral-7b-instruct:free',
           temperature: 0.7,
           top_p: 1,
@@ -255,59 +256,6 @@ describe('LobeOpenRouterAI', () => {
       });
     });
 
-    describe('LobeOpenRouterAI chat with callback and headers', () => {
-      it('should handle callback and headers correctly', async () => {
-        // 模拟 chat.completions.create 方法返回一个可读流
-        const mockCreateMethod = vi
-          .spyOn(instance['client'].chat.completions, 'create')
-          .mockResolvedValue(
-            new ReadableStream({
-              start(controller) {
-                controller.enqueue({
-                  id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
-                  object: 'chat.completion.chunk',
-                  created: 1709125675,
-                  model: 'mistralai/mistral-7b-instruct:free',
-                  system_fingerprint: 'fp_86156a94a0',
-                  choices: [
-                    { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null },
-                  ],
-                });
-                controller.close();
-              },
-            }) as any,
-          );
-
-        // 准备 callback 和 headers
-        const mockCallback: ChatStreamCallbacks = {
-          onStart: vi.fn(),
-          onToken: vi.fn(),
-        };
-        const mockHeaders = { 'Custom-Header': 'TestValue' };
-
-        // 执行测试
-        const result = await instance.chat(
-          {
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          },
-          { callback: mockCallback, headers: mockHeaders },
-        );
-
-        // 验证 callback 被调用
-        await result.text(); // 确保流被消费
-        expect(mockCallback.onStart).toHaveBeenCalled();
-        expect(mockCallback.onToken).toHaveBeenCalledWith('hello');
-
-        // 验证 headers 被正确传递
-        expect(result.headers.get('Custom-Header')).toEqual('TestValue');
-
-        // 清理
-        mockCreateMethod.mockRestore();
-      });
-    });
-
     describe('DEBUG', () => {
       it('should call debugStream and return StreamingTextResponse when DEBUG_OPENROUTER_CHAT_COMPLETION is 1', async () => {
         // Arrange
diff --git a/src/libs/agent-runtime/perplexity/index.test.ts b/src/libs/agent-runtime/perplexity/index.test.ts
index 07e29fa5eaa3..6a7d0e8d99e6 100644
--- a/src/libs/agent-runtime/perplexity/index.test.ts
+++ b/src/libs/agent-runtime/perplexity/index.test.ts
@@ -40,24 +40,6 @@ describe('LobePerplexityAI', () => {
   });
 
   describe('chat', () => {
-    it('should return a StreamingTextResponse on successful API call', async () => {
-      // Arrange
-      const mockStream = new ReadableStream();
-      const mockResponse = Promise.resolve(mockStream);
-
-      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
-
-      // Act
-      const result = await instance.chat({
-        messages: [{ content: 'Hello', role: 'user' }],
-        model: 'text-davinci-003',
-        temperature: 0,
-      });
-
-      // Assert
-      expect(result).toBeInstanceOf(Response);
-    });
-
     describe('Error', () => {
       it('should return OpenAIBizError with an openai error response when OpenAI.APIError is thrown', async () => {
         // Arrange
@@ -223,59 +205,6 @@ describe('LobePerplexityAI', () => {
       });
     });
 
-    describe('LobePerplexityAI chat with callback and headers', () => {
-      it('should handle callback and headers correctly', async () => {
-        // 模拟 chat.completions.create 方法返回一个可读流
-        const mockCreateMethod = vi
-          .spyOn(instance['client'].chat.completions, 'create')
-          .mockResolvedValue(
-            new ReadableStream({
-              start(controller) {
-                controller.enqueue({
-                  id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
-                  object: 'chat.completion.chunk',
-                  created: 1709125675,
-                  model: 'gpt-3.5-turbo-0125',
-                  system_fingerprint: 'fp_86156a94a0',
-                  choices: [
-                    { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null },
-                  ],
-                });
-                controller.close();
-              },
-            }) as any,
-          );
-
-        // 准备 callback 和 headers
-        const mockCallback: ChatStreamCallbacks = {
-          onStart: vi.fn(),
-          onToken: vi.fn(),
-        };
-        const mockHeaders = { 'Custom-Header': 'TestValue' };
-
-        // 执行测试
-        const result = await instance.chat(
-          {
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'text-davinci-003',
-            temperature: 0,
-          },
-          { callback: mockCallback, headers: mockHeaders },
-        );
-
-        // 验证 callback 被调用
-        await result.text(); // 确保流被消费
-        expect(mockCallback.onStart).toHaveBeenCalled();
-        expect(mockCallback.onToken).toHaveBeenCalledWith('hello');
-
-        // 验证 headers 被正确传递
-        expect(result.headers.get('Custom-Header')).toEqual('TestValue');
-
-        // 清理
-        mockCreateMethod.mockRestore();
-      });
-    });
-
     describe('DEBUG', () => {
       it('should call debugStream and return StreamingTextResponse when DEBUG_PERPLEXITY_CHAT_COMPLETION is 1', async () => {
         // Arrange
diff --git a/src/libs/agent-runtime/perplexity/index.ts b/src/libs/agent-runtime/perplexity/index.ts
index 302618f8096e..446e4389ad43 100644
--- a/src/libs/agent-runtime/perplexity/index.ts
+++ b/src/libs/agent-runtime/perplexity/index.ts
@@ -9,7 +9,7 @@ export const LobePerplexityAI = LobeOpenAICompatibleFactory({
   chatCompletion: {
     handlePayload: (payload: ChatStreamPayload) => {
       // Set a default frequency penalty value greater than 0
-      const { presence_penalty, frequency_penalty, ...res } = payload;
+      const { presence_penalty, frequency_penalty, stream = true, ...res } = payload;
 
       let param;
 
@@ -22,8 +22,7 @@ export const LobePerplexityAI = LobeOpenAICompatibleFactory({
         param = { frequency_penalty: frequency_penalty || defaultFrequencyPenalty };
       }
 
-      console.log(param);
-      return { ...res, ...param } as OpenAI.ChatCompletionCreateParamsStreaming;
+      return { ...res, ...param, stream } as OpenAI.ChatCompletionCreateParamsStreaming;
     },
   },
   debug: {
diff --git a/src/libs/agent-runtime/togetherai/index.test.ts b/src/libs/agent-runtime/togetherai/index.test.ts
index 80e22bc13487..e13d62375ea2 100644
--- a/src/libs/agent-runtime/togetherai/index.test.ts
+++ b/src/libs/agent-runtime/togetherai/index.test.ts
@@ -81,6 +81,7 @@ describe('LobeTogetherAI', () => {
           messages: [{ content: 'Hello', role: 'user' }],
           model: 'mistralai/mistral-7b-instruct:free',
           temperature: 0.7,
+          stream: true,
           top_p: 1,
         },
         { headers: { Accept: '*/*' } },
@@ -253,59 +254,6 @@ describe('LobeTogetherAI', () => {
       });
     });
 
-    describe('LobeTogetherAI chat with callback and headers', () => {
-      it('should handle callback and headers correctly', async () => {
-        // 模拟 chat.completions.create 方法返回一个可读流
-        const mockCreateMethod = vi
-          .spyOn(instance['client'].chat.completions, 'create')
-          .mockResolvedValue(
-            new ReadableStream({
-              start(controller) {
-                controller.enqueue({
-                  id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
-                  object: 'chat.completion.chunk',
-                  created: 1709125675,
-                  model: 'mistralai/mistral-7b-instruct:free',
-                  system_fingerprint: 'fp_86156a94a0',
-                  choices: [
-                    { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null },
-                  ],
-                });
-                controller.close();
-              },
-            }) as any,
-          );
-
-        // 准备 callback 和 headers
-        const mockCallback: ChatStreamCallbacks = {
-          onStart: vi.fn(),
-          onToken: vi.fn(),
-        };
-        const mockHeaders = { 'Custom-Header': 'TestValue' };
-
-        // 执行测试
-        const result = await instance.chat(
-          {
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'mistralai/mistral-7b-instruct:free',
-            temperature: 0,
-          },
-          { callback: mockCallback, headers: mockHeaders },
-        );
-
-        // 验证 callback 被调用
-        await result.text(); // 确保流被消费
-        expect(mockCallback.onStart).toHaveBeenCalled();
-        expect(mockCallback.onToken).toHaveBeenCalledWith('hello');
-
-        // 验证 headers 被正确传递
-        expect(result.headers.get('Custom-Header')).toEqual('TestValue');
-
-        // 清理
-        mockCreateMethod.mockRestore();
-      });
-    });
-
     describe('DEBUG', () => {
       it('should call debugStream and return StreamingTextResponse when DEBUG_TOGETHERAI_CHAT_COMPLETION is 1', async () => {
         // Arrange
diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.test.ts b/src/libs/agent-runtime/utils/anthropicHelpers.test.ts
index 8cf8d7f5e615..8f15e880c8c9 100644
--- a/src/libs/agent-runtime/utils/anthropicHelpers.test.ts
+++ b/src/libs/agent-runtime/utils/anthropicHelpers.test.ts
@@ -49,7 +49,7 @@ describe('anthropicHelpers', () => {
         role: 'system',
       };
       const result = buildAnthropicMessage(message);
-      expect(result).toEqual({ content: [{ type: 'text', text: 'Hello!' }], role: 'assistant' });
+      expect(result).toEqual({ content: [{ type: 'text', text: 'Hello!' }], role: 'user' });
     });
   });
 
diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts
new file mode 100644
index 000000000000..621bd94f8583
--- /dev/null
+++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts
@@ -0,0 +1,382 @@
+// @vitest-environment node
+import OpenAI from 'openai';
+import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import {
+  AgentRuntimeErrorType,
+  ChatStreamCallbacks,
+  LobeOpenAICompatibleRuntime,
+  ModelProvider,
+} from '@/libs/agent-runtime';
+
+import * as debugStreamModule from '../debugStream';
+import { LobeOpenAICompatibleFactory } from './index';
+
+const provider = 'groq';
+const defaultBaseURL = 'https://api.groq.com/openai/v1';
+const bizErrorType = 'GroqBizError';
+const invalidErrorType = 'InvalidGroqAPIKey';
+
+// Mock the console.error to avoid polluting test output
+vi.spyOn(console, 'error').mockImplementation(() => {});
+
+let instance: LobeOpenAICompatibleRuntime;
+
+const LobeMockProvider = LobeOpenAICompatibleFactory({
+  baseURL: defaultBaseURL,
+  chatCompletion: {
+    handleError: (error) => {
+      // 403 means the location is not supporteds
+      if (error.status === 403)
+        return { error, errorType: AgentRuntimeErrorType.LocationNotSupportError };
+    },
+    handlePayload: (payload) => {
+      return {
+        ...payload,
+        // disable stream for tools due to groq dont support
+        stream: !payload.tools,
+      } as any;
+    },
+  },
+  debug: {
+    chatCompletion: () => process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION === '1',
+  },
+  errorType: {
+    bizError: AgentRuntimeErrorType.GroqBizError,
+    invalidAPIKey: AgentRuntimeErrorType.InvalidGroqAPIKey,
+  },
+  provider: ModelProvider.Groq,
+});
+
+beforeEach(() => {
+  instance = new LobeMockProvider({ apiKey: 'test' });
+
+  // 使用 vi.spyOn 来模拟 chat.completions.create 方法
+  vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
+    new ReadableStream() as any,
+  );
+});
+
+afterEach(() => {
+  vi.clearAllMocks();
+});
+
+describe('LobeOpenAICompatibleFactory', () => {
+  describe('init', () => {
+    it('should correctly initialize with an API key', async () => {
+      const instance = new LobeMockProvider({ apiKey: 'test_api_key' });
+      expect(instance).toBeInstanceOf(LobeMockProvider);
+      expect(instance.baseURL).toEqual(defaultBaseURL);
+    });
+  });
+
+  describe('chat', () => {
+    it('should return a StreamingTextResponse on successful API call', async () => {
+      // Arrange
+      const mockStream = new ReadableStream();
+      const mockResponse = Promise.resolve(mockStream);
+
+      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
+
+      // Act
+      const result = await instance.chat({
+        messages: [{ content: 'Hello', role: 'user' }],
+        model: 'mistralai/mistral-7b-instruct:free',
+        temperature: 0,
+      });
+
+      // Assert
+      expect(result).toBeInstanceOf(Response);
+    });
+
+    it('should call chat API with corresponding options', async () => {
+      // Arrange
+      const mockStream = new ReadableStream();
+      const mockResponse = Promise.resolve(mockStream);
+
+      (instance['client'].chat.completions.create as Mock).mockResolvedValue(mockResponse);
+
+      // Act
+      const result = await instance.chat({
+        max_tokens: 1024,
+        messages: [{ content: 'Hello', role: 'user' }],
+        model: 'mistralai/mistral-7b-instruct:free',
+        temperature: 0.7,
+        top_p: 1,
+      });
+
+      // Assert
+      expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
+        {
+          max_tokens: 1024,
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: 'mistralai/mistral-7b-instruct:free',
+          temperature: 0.7,
+          stream: true,
+          top_p: 1,
+        },
+        { headers: { Accept: '*/*' } },
+      );
+      expect(result).toBeInstanceOf(Response);
+    });
+
+    describe('Error', () => {
+      it('should return bizErrorType with an openai error response when OpenAI.APIError is thrown', async () => {
+        // Arrange
+        const apiError = new OpenAI.APIError(
+          400,
+          {
+            status: 400,
+            error: {
+              message: 'Bad Request',
+            },
+          },
+          'Error message',
+          {},
+        );
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'mistralai/mistral-7b-instruct:free',
+            temperature: 0,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            error: {
+              error: { message: 'Bad Request' },
+              status: 400,
+            },
+            errorType: bizErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should throw AgentRuntimeError with invalidErrorType if no apiKey is provided', async () => {
+        try {
+          new LobeMockProvider({});
+        } catch (e) {
+          expect(e).toEqual({ errorType: invalidErrorType });
+        }
+      });
+
+      it('should return bizErrorType with the cause when OpenAI.APIError is thrown with cause', async () => {
+        // Arrange
+        const errorInfo = {
+          stack: 'abc',
+          cause: {
+            message: 'api is undefined',
+          },
+        };
+        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'mistralai/mistral-7b-instruct:free',
+            temperature: 0,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            error: {
+              cause: { message: 'api is undefined' },
+              stack: 'abc',
+            },
+            errorType: bizErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should return bizErrorType with an cause response with desensitize Url', async () => {
+        // Arrange
+        const errorInfo = {
+          stack: 'abc',
+          cause: { message: 'api is undefined' },
+        };
+        const apiError = new OpenAI.APIError(400, errorInfo, 'module error', {});
+
+        instance = new LobeMockProvider({
+          apiKey: 'test',
+
+          baseURL: 'https://api.abc.com/v1',
+        });
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(apiError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'mistralai/mistral-7b-instruct:free',
+            temperature: 0,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: 'https://api.***.com/v1',
+            error: {
+              cause: { message: 'api is undefined' },
+              stack: 'abc',
+            },
+            errorType: bizErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should throw an InvalidOpenRouterAPIKey error type on 401 status code', async () => {
+        // Mock the API call to simulate a 401 error
+        const error = new Error('Unauthorized') as any;
+        error.status = 401;
+        vi.mocked(instance['client'].chat.completions.create).mockRejectedValue(error);
+
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'mistralai/mistral-7b-instruct:free',
+            temperature: 0,
+          });
+        } catch (e) {
+          // Expect the chat method to throw an error with InvalidMoonshotAPIKey
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            error: new Error('Unauthorized'),
+            errorType: invalidErrorType,
+            provider,
+          });
+        }
+      });
+
+      it('should return AgentRuntimeError for non-OpenAI errors', async () => {
+        // Arrange
+        const genericError = new Error('Generic Error');
+
+        vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(genericError);
+
+        // Act
+        try {
+          await instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'mistralai/mistral-7b-instruct:free',
+            temperature: 0,
+          });
+        } catch (e) {
+          expect(e).toEqual({
+            endpoint: defaultBaseURL,
+            errorType: 'AgentRuntimeError',
+            provider,
+            error: {
+              name: genericError.name,
+              cause: genericError.cause,
+              message: genericError.message,
+              stack: genericError.stack,
+            },
+          });
+        }
+      });
+    });
+
+    describe('chat with callback and headers', () => {
+      it('should handle callback and headers correctly', async () => {
+        // 模拟 chat.completions.create 方法返回一个可读流
+        const mockCreateMethod = vi
+          .spyOn(instance['client'].chat.completions, 'create')
+          .mockResolvedValue(
+            new ReadableStream({
+              start(controller) {
+                controller.enqueue({
+                  id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
+                  object: 'chat.completion.chunk',
+                  created: 1709125675,
+                  model: 'mistralai/mistral-7b-instruct:free',
+                  system_fingerprint: 'fp_86156a94a0',
+                  choices: [
+                    { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null },
+                  ],
+                });
+                controller.close();
+              },
+            }) as any,
+          );
+
+        // 准备 callback 和 headers
+        const mockCallback: ChatStreamCallbacks = {
+          onStart: vi.fn(),
+          onToken: vi.fn(),
+        };
+        const mockHeaders = { 'Custom-Header': 'TestValue' };
+
+        // 执行测试
+        const result = await instance.chat(
+          {
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'mistralai/mistral-7b-instruct:free',
+            temperature: 0,
+          },
+          { callback: mockCallback, headers: mockHeaders },
+        );
+
+        // 验证 callback 被调用
+        await result.text(); // 确保流被消费
+        expect(mockCallback.onStart).toHaveBeenCalled();
+        expect(mockCallback.onToken).toHaveBeenCalledWith('hello');
+
+        // 验证 headers 被正确传递
+        expect(result.headers.get('Custom-Header')).toEqual('TestValue');
+
+        // 清理
+        mockCreateMethod.mockRestore();
+      });
+    });
+
+    describe('DEBUG', () => {
+      it('should call debugStream and return StreamingTextResponse when DEBUG_OPENROUTER_CHAT_COMPLETION is 1', async () => {
+        // Arrange
+        const mockProdStream = new ReadableStream() as any; // 模拟的 prod 流
+        const mockDebugStream = new ReadableStream({
+          start(controller) {
+            controller.enqueue('Debug stream content');
+            controller.close();
+          },
+        }) as any;
+        mockDebugStream.toReadableStream = () => mockDebugStream; // 添加 toReadableStream 方法
+
+        // 模拟 chat.completions.create 返回值，包括模拟的 tee 方法
+        (instance['client'].chat.completions.create as Mock).mockResolvedValue({
+          tee: () => [mockProdStream, { toReadableStream: () => mockDebugStream }],
+        });
+
+        // 保存原始环境变量值
+        const originalDebugValue = process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION;
+
+        // 模拟环境变量
+        process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION = '1';
+        vi.spyOn(debugStreamModule, 'debugStream').mockImplementation(() => Promise.resolve());
+
+        // 执行测试
+        // 运行你的测试函数，确保它会在条件满足时调用 debugStream
+        // 假设的测试函数调用，你可能需要根据实际情况调整
+        await instance.chat({
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: 'mistralai/mistral-7b-instruct:free',
+          temperature: 0,
+        });
+
+        // 验证 debugStream 被调用
+        expect(debugStreamModule.debugStream).toHaveBeenCalled();
+
+        // 恢复原始环境变量值
+        process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION = originalDebugValue;
+      });
+    });
+  });
+});
diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
index dda87d6ffab3..ece97dbf9834 100644
--- a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
+++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.ts
@@ -73,7 +73,10 @@ export const LobeOpenAICompatibleFactory = ({
       try {
         const postPayload = chatCompletion?.handlePayload
           ? chatCompletion.handlePayload(payload)
-          : (payload as unknown as OpenAI.ChatCompletionCreateParamsStreaming);
+          : ({
+              ...payload,
+              stream: payload.stream ?? true,
+            } as OpenAI.ChatCompletionCreateParamsStreaming);
 
         const response = await this.client.chat.completions.create(postPayload, {
           // https://github.com/lobehub/lobe-chat/pull/318
diff --git a/src/libs/agent-runtime/zeroone/index.test.ts b/src/libs/agent-runtime/zeroone/index.test.ts
index 896e2bc3174e..7089b9a7cddb 100644
--- a/src/libs/agent-runtime/zeroone/index.test.ts
+++ b/src/libs/agent-runtime/zeroone/index.test.ts
@@ -81,6 +81,7 @@ describe('LobeZeroOneAI', () => {
           messages: [{ content: 'Hello', role: 'user' }],
           model: 'yi-34b-chat-0205',
           temperature: 0.7,
+          stream: true,
           top_p: 1,
         },
         { headers: { Accept: '*/*' } },
@@ -253,59 +254,6 @@ describe('LobeZeroOneAI', () => {
       });
     });
 
-    describe('LobeZeroOneAI chat with callback and headers', () => {
-      it('should handle callback and headers correctly', async () => {
-        // 模拟 chat.completions.create 方法返回一个可读流
-        const mockCreateMethod = vi
-          .spyOn(instance['client'].chat.completions, 'create')
-          .mockResolvedValue(
-            new ReadableStream({
-              start(controller) {
-                controller.enqueue({
-                  id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
-                  object: 'chat.completion.chunk',
-                  created: 1709125675,
-                  model: 'yi-34b-chat-0205',
-                  system_fingerprint: 'fp_86156a94a0',
-                  choices: [
-                    { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null },
-                  ],
-                });
-                controller.close();
-              },
-            }) as any,
-          );
-
-        // 准备 callback 和 headers
-        const mockCallback: ChatStreamCallbacks = {
-          onStart: vi.fn(),
-          onToken: vi.fn(),
-        };
-        const mockHeaders = { 'Custom-Header': 'TestValue' };
-
-        // 执行测试
-        const result = await instance.chat(
-          {
-            messages: [{ content: 'Hello', role: 'user' }],
-            model: 'yi-34b-chat-0205',
-            temperature: 0,
-          },
-          { callback: mockCallback, headers: mockHeaders },
-        );
-
-        // 验证 callback 被调用
-        await result.text(); // 确保流被消费
-        expect(mockCallback.onStart).toHaveBeenCalled();
-        expect(mockCallback.onToken).toHaveBeenCalledWith('hello');
-
-        // 验证 headers 被正确传递
-        expect(result.headers.get('Custom-Header')).toEqual('TestValue');
-
-        // 清理
-        mockCreateMethod.mockRestore();
-      });
-    });
-
     describe('DEBUG', () => {
       it('should call debugStream and return StreamingTextResponse when DEBUG_ZEROONE_CHAT_COMPLETION is 1', async () => {
         // Arrange
diff --git a/src/libs/agent-runtime/zhipu/index.test.ts b/src/libs/agent-runtime/zhipu/index.test.ts
index b385085d1e00..5edd8fcb1a3a 100644
--- a/src/libs/agent-runtime/zhipu/index.test.ts
+++ b/src/libs/agent-runtime/zhipu/index.test.ts
@@ -61,7 +61,7 @@ describe('LobeZhipuAI', () => {
         model: 'glm-4',
         temperature: 0,
       });
-      expect(result).toBeInstanceOf(StreamingTextResponse);
+      expect(result).toBeInstanceOf(Response);
     });
 
     it('should handle callback and headers correctly', async () => {

From 6c07cf943ddf18724f58f26a54f5a3e2edd75bd1 Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Sat, 11 May 2024 05:54:49 +0000
Subject: [PATCH 16/24] =?UTF-8?q?=F0=9F=97=83=EF=B8=8F=20refactor:=20impor?=
 =?UTF-8?q?t=20and=20export=20tool=20migration?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../client/models/__tests__/message.test.ts   |   2 +-
 src/database/client/schemas/message.ts        |   3 +-
 src/features/Conversation/Actions/index.ts    |   2 -
 .../fixtures/from-v1-to-v5-output.json        | 245 ++++++++++++++++++
 .../fixtures/function-input-v4.json           |  96 +++++++
 .../fixtures/function-output-v5.json          | 120 +++++++++
 src/migrations/FromV4ToV5/index.ts            |  58 +++++
 src/migrations/FromV4ToV5/migrations.test.ts  |  49 ++++
 src/migrations/FromV4ToV5/types/v4.ts         |  21 ++
 src/migrations/FromV4ToV5/types/v5.ts         |  27 ++
 src/migrations/index.ts                       |   9 +-
 src/services/chat.ts                          |   8 +-
 src/store/chat/slices/message/action.ts       |   2 +-
 src/store/chat/slices/share/action.test.ts    |   4 +-
 src/store/chat/slices/share/action.ts         |   2 +-
 src/types/llm.ts                              |   2 +-
 src/types/message/index.ts                    |   9 +-
 17 files changed, 635 insertions(+), 24 deletions(-)
 create mode 100644 src/migrations/FromV4ToV5/fixtures/from-v1-to-v5-output.json
 create mode 100644 src/migrations/FromV4ToV5/fixtures/function-input-v4.json
 create mode 100644 src/migrations/FromV4ToV5/fixtures/function-output-v5.json
 create mode 100644 src/migrations/FromV4ToV5/index.ts
 create mode 100644 src/migrations/FromV4ToV5/migrations.test.ts
 create mode 100644 src/migrations/FromV4ToV5/types/v4.ts
 create mode 100644 src/migrations/FromV4ToV5/types/v5.ts

diff --git a/src/database/client/models/__tests__/message.test.ts b/src/database/client/models/__tests__/message.test.ts
index 4c6402495a71..26687e7c6b0a 100644
--- a/src/database/client/models/__tests__/message.test.ts
+++ b/src/database/client/models/__tests__/message.test.ts
@@ -264,7 +264,7 @@ describe('MessageModel', () => {
     it('should update a role and plugins', async () => {
       const createdMessage = await MessageModel.create(messageData);
       const updateData = {
-        role: 'function' as const,
+        role: 'tool' as const,
         plugin: { apiName: 'a', identifier: 'b', arguments: 'abc' },
       };
 
diff --git a/src/database/client/schemas/message.ts b/src/database/client/schemas/message.ts
index ba4d2cfe493b..feddb3960eb5 100644
--- a/src/database/client/schemas/message.ts
+++ b/src/database/client/schemas/message.ts
@@ -1,7 +1,6 @@
 /* eslint-disable sort-keys-fix/sort-keys-fix  */
 import { z } from 'zod';
 
-
 const TranslateSchema = z.object({
   from: z.string().optional(),
   to: z.string(),
@@ -20,7 +19,7 @@ const ToolCallSchema = PluginSchema.extend({
 });
 
 export const DB_MessageSchema = z.object({
-  role: z.enum(['user', 'system', 'assistant', 'function', 'tool']),
+  role: z.enum(['user', 'system', 'assistant', 'tool']),
   content: z.string(),
   files: z.array(z.string()).optional(),
   favorite: z.number().int().min(0).max(1).optional(),
diff --git a/src/features/Conversation/Actions/index.ts b/src/features/Conversation/Actions/index.ts
index dfa380401829..c18508dc5cea 100644
--- a/src/features/Conversation/Actions/index.ts
+++ b/src/features/Conversation/Actions/index.ts
@@ -8,13 +8,11 @@ import { LLMRoleType } from '@/types/llm';
 import { OnActionsClick, RenderAction } from '../types';
 import { AssistantActionsBar } from './Assistant';
 import { DefaultActionsBar } from './Fallback';
-import { FunctionActionsBar } from './Function';
 import { ToolActionsBar } from './Tool';
 import { UserActionsBar } from './User';
 
 export const renderActions: Record<LLMRoleType, RenderAction> = {
   assistant: AssistantActionsBar,
-  function: FunctionActionsBar,
   system: DefaultActionsBar,
   tool: ToolActionsBar,
   user: UserActionsBar,
diff --git a/src/migrations/FromV4ToV5/fixtures/from-v1-to-v5-output.json b/src/migrations/FromV4ToV5/fixtures/from-v1-to-v5-output.json
new file mode 100644
index 000000000000..dc0fd7d910e3
--- /dev/null
+++ b/src/migrations/FromV4ToV5/fixtures/from-v1-to-v5-output.json
@@ -0,0 +1,245 @@
+{
+  "exportType": "sessions",
+  "state": {
+    "sessions": [
+      {
+        "config": {
+          "displayMode": "chat",
+          "historyCount": 1,
+          "model": "gpt-3.5-turbo",
+          "params": {
+            "frequency_penalty": 0,
+            "presence_penalty": 0,
+            "temperature": 0.6,
+            "top_p": 1,
+            "max_tokens": 2200
+          },
+          "plugins": [],
+          "systemRole": "你是一名 Postgresql 的数据库专家。用户是一名数据库小白，你需要使用简单直白的方式告诉用户如何使用 ostgresql 和它相应的 orm 工具 primsa",
+          "enableCompressThreshold": false,
+          "enableMaxTokens": false,
+          "inputTemplate": ""
+        },
+        "id": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "meta": {
+          "avatar": "🐘",
+          "backgroundColor": "rgba(0,0,0,0)",
+          "title": "Postgresql 指南",
+          "description": "PostgreSQL 数据库和 Prisma ORM 工具的简单使用指南",
+          "tags": ["数据库", "PostgreSQL", "教程", "ORM", "Prisma"]
+        },
+        "type": "agent",
+        "createdAt": 1698463356269,
+        "group": "default",
+        "updatedAt": 1698463356269,
+        "pinned": true
+      }
+    ],
+    "messages": [
+      {
+        "content": "是的，你可以使用 Prisma 来自定义生成 `id` 的逻辑，例如使用 `nanoid` 库来生成唯一标识符。\n\n首先，你需要在项目中安装 `nanoid` 库：\n\n```\nnpm install nanoid\n```\n\n然后，你可以在 Prisma 的模型定义中使用 `@default` 指令，并提供一个自定义的默认值函数来生成 `id`。\n\n```prisma\nmodel Message {\n  id String @id @default(dbgenerated(\"nanoid(10)\"))\n\n  createdAt DateTime @default(now())\n  updatedAt DateTime @updatedAt\n}\n```\n\n在上述模型定义中，我们使用 `dbgenerated` 指令来定义一个自定义的默认值函数。函数 `nanoid(10)` 会生成一个长度为 10 的唯一标识符。\n\n请注意，使用自定义的 `id` 生成逻辑可能会对数据库性能产生一些影响，因为每次创建新记录时都需要执行自定义函数。因此，请确保你的自定义逻辑是高效的，并在实际使用中进行测试和评估性能。",
+        "id": "n2Tsl3Dm",
+        "meta": {},
+        "parentId": "JSZaJOGj",
+        "role": "assistant",
+        "topicId": "CZg3IEdy",
+        "createdAt": 1698463981313,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698464004494,
+        "fromModel": "gpt-3.5-turbo"
+      },
+      {
+        "content": "如何指定 自定义函数的引入位置？",
+        "id": "xPft8Msa",
+        "meta": {},
+        "role": "user",
+        "topicId": "CZg3IEdy",
+        "createdAt": 1698464173224,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698464173226
+      },
+      {
+        "content": "在 Prisma 中，你可以通过在 `schema.prisma` 文件的顶部使用 `generator` 块来指定自定义函数的引入位置。\n\n例如，假设你想使用 `nanoid` 库来生成自定义的 `id`，你可以按照以下方式配置：\n\n```prisma\ngenerator client {\n  provider = \"prisma-client-js\"\n  output   = \"./node_modules/@prisma/client\"\n}\n\ngenerator dbgenerated {\n  provider = \"prisma-dbgenerator\"\n  output   = \"./node_modules/@prisma/client\"\n  binaryTargets = [\"native\"]\n  previewFeatures = [\"nativeTypes\"]\n  config = {\n    \"generator\": {\n      \"dbgenerated\": {\n        \"output\": \"./src/db/generated\",\n        \"provider\": \"./src/db/generated/provider.js\"\n      }\n    }\n  }\n}\n```\n\n在上述配置中，我们定义了两个生成器。`client` 生成器用于生成 Prisma 客户端代码，而 `dbgenerated` 生成器用于生成数据库相关的代码。\n\n注意到 `dbgenerated` 生成器的 `config` 部分中有一个 `provider` 字段，它指定了自定义函数的引入位置。在这个示例中，我们将自定义函数放在 `./src/db/generated/provider.js` 文件中。\n\n你可以根据你的项目结构和需要来调整路径和文件名，确保指定的 `provider` 路径正确引入了自定义函数。",
+        "id": "jZc2KKRs",
+        "meta": {},
+        "parentId": "xPft8Msa",
+        "role": "assistant",
+        "topicId": "CZg3IEdy",
+        "createdAt": 1698464173228,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698464181896,
+        "fromModel": "gpt-3.5-turbo"
+      },
+      {
+        "content": "如何启动一个 postgres Docker 镜像？",
+        "id": "LjG9XSuq",
+        "meta": {},
+        "role": "user",
+        "topicId": "MNWhBEh5",
+        "createdAt": 1698754460196,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698754463118
+      },
+      {
+        "content": "要启动一个 PostgreSQL Docker 镜像，你可以按照以下步骤操作：\n\n1. 首先，确保你已经安装了 Docker，可以在终端中运行 `docker --version` 来检查。\n\n2. 在终端中运行以下命令以下载并启动一个 PostgreSQL 镜像：\n\n   ```shell\n   docker run --name my-postgres -e POSTGRES_PASSWORD=mysecretpassword -p 5432:5432 -d postgres\n   ```\n\n   这将从 Docker Hub 下载 PostgreSQL 镜像",
+        "id": "CL7E1Mc5",
+        "meta": {},
+        "parentId": "LjG9XSuq",
+        "role": "assistant",
+        "topicId": "MNWhBEh5",
+        "createdAt": 1698754479751,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698754507690,
+        "fromModel": "gpt-3.5-turbo"
+      },
+      {
+        "content": "如何指定本地存储的目录到 /www/database/postgres",
+        "id": "UIvHZr7m",
+        "meta": {},
+        "role": "user",
+        "topicId": "MNWhBEh5",
+        "createdAt": 1698754591763,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698754591764
+      },
+      {
+        "content": "要将本地存储的目录映射到容器内的 `/var/lib/postgresql/data` 目录，你可以使用 Docker 的 `-v` 参数。\n\n以下是示例命令：\n\n```shell\ndocker run --name my-postgres -e POSTGRES_PASSWORD=mysecretpassword -p 5432:5432 -v /www/database/postgres:/var/lib/postgresql/data -d postgres\n```\n\n在这个命令中，我们使用了 `-v` 参数来指定本地目录 `/www/database/postgres` 映射到容器内的 `/var/lib/postgresql/data` 目录。这样，所有的 PostgreSQL 数据将会保存在本地指定的目录中。",
+        "id": "XLJOK44r",
+        "meta": {},
+        "parentId": "UIvHZr7m",
+        "role": "assistant",
+        "topicId": "MNWhBEh5",
+        "createdAt": 1698754591765,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698754609275,
+        "fromModel": "gpt-3.5-turbo"
+      },
+      {
+        "content": "docker run --name postgres -e POSTGRES_PASSWORD=DSLapHYdzNrai5j6 -p 33300:5432 -v /www/database/postgres:/var/lib/postgresql/data -d postgres",
+        "id": "W3J6jQjm",
+        "meta": {},
+        "role": "user",
+        "topicId": "MNWhBEh5",
+        "createdAt": 1698754700529,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698754700530
+      },
+      {
+        "content": "如何启动 pgadmin",
+        "id": "L7mq0ehV",
+        "meta": {},
+        "role": "user",
+        "topicId": "MNWhBEh5",
+        "createdAt": 1698755749886,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698755749888
+      },
+      {
+        "content": "要启动 pgAdmin，你可以按照以下步骤操作：\n\n1. 首先，确保你已经安装了 Docker，并且已经成功启动了 PostgreSQL Docker 容器。\n\n2. 在终端中运行以下命令以启动 pgAdmin Docker 容器：\n\n   ```shell\n   docker run --name my-pgadmin -p 5050:80 -e PGADMIN_DEFAULT_EMAIL=myemail@example.com -e PGADMIN_DEFAULT_PASSWORD=mypassword -d dpage/pgadmin4\n   ```\n\n   在这个命令中，我们使用了 `-p` 参数来将容器的端口 80 映射到本地的端口 5050。你可以根据需要修改这个端口映射。\n\n   我们还使用了 `-e` 参数来设置 pgAdmin 的默认登录邮箱和密码。你可以根据需要修改这些值。\n\n3. 在浏览器中访问 `http://localhost:5050`，你将看到 pgAdmin 的登录界面。\n\n4. 使用在上一步中设置的邮箱和密码进行登录。\n\n5. 在 pgAdmin 中，你可以添加一个新的服务器连接，以连接到运行在 Docker 容器中的 PostgreSQL 数据库。要添加一个新的服务器连接，你需要提供以下信息：\n\n   - 主机名/地址：使用 Docker 容器的 IP 地址或者 `localhost`。\n   - 端口：使用 Docker 容器中 PostgreSQL 的映射端口，例如 33300。\n   - 用户名和密码：使用在启动 PostgreSQL 容器时设置的用户名和密码。\n\n   完成上述信息的填写后，点击保存并连接到 PostgreSQL 服务器。\n\n现在，你已经成功启动了 pgAdmin 并连接到了你的 PostgreSQL 数据库。你可以使用 pgAdmin 来管理和操作你的数据库。",
+        "id": "d5XTX9EQ",
+        "meta": {},
+        "parentId": "L7mq0ehV",
+        "role": "assistant",
+        "topicId": "MNWhBEh5",
+        "createdAt": 1698755749889,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698755786183,
+        "fromModel": "gpt-3.5-turbo"
+      },
+      {
+        "content": "",
+        "id": "tool_calls_KPPDiRyW",
+        "meta": {},
+        "parentId": "42k72jMi",
+        "role": "assistant",
+        "createdAt": 1690650544842,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1690650572389,
+        "fromModel": "gpt-3.5-turbo-16k",
+        "tools": [
+          {
+            "apiName": "websiteCrawler",
+            "arguments": "{\n  \"url\": \"https://mp.weixin.qq.com/s?__biz=MjM5MTA1MjAxMQ==&mid=2651264337&idx=1&sn=d7d9126578c74c912e1f0d42cb5629da&chksm=bd48ccd58a3f45c3f2cbc7d7b732c820b9e7cd6b547c06acc8170b233710b5fb5ed62f5fd94d&mpshare=1&scene=1&srcid=07294Mpw7C6JCLgtagL1cBDR&sharer_sharetime=1690622712877&sharer_shareid=0358058a42fc25387d28337fc3d22c3c#rd\"\n}",
+            "id": "tool_call_KPPDiRyW",
+            "identifier": "websiteCrawler",
+            "type": "default"
+          }
+        ]
+      },
+      {
+        "content": "abcabc",
+        "id": "KPPDiRyW",
+        "meta": {},
+        "parentId": "tool_calls_KPPDiRyW",
+        "role": "tool",
+        "createdAt": 1690650544852,
+        "plugin": {
+          "apiName": "websiteCrawler",
+          "arguments": "{\n  \"url\": \"https://mp.weixin.qq.com/s?__biz=MjM5MTA1MjAxMQ==&mid=2651264337&idx=1&sn=d7d9126578c74c912e1f0d42cb5629da&chksm=bd48ccd58a3f45c3f2cbc7d7b732c820b9e7cd6b547c06acc8170b233710b5fb5ed62f5fd94d&mpshare=1&scene=1&srcid=07294Mpw7C6JCLgtagL1cBDR&sharer_sharetime=1690622712877&sharer_shareid=0358058a42fc25387d28337fc3d22c3c#rd\"\n}",
+          "identifier": "websiteCrawler",
+          "type": "default"
+        },
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1690650572399,
+        "fromModel": "gpt-3.5-turbo-16k",
+        "tool_call_id": "tool_call_KPPDiRyW"
+      },
+      {
+        "content": "",
+        "id": "tool_calls_9cRjevRQ",
+        "meta": {},
+        "parentId": "3nDXtEKv",
+        "role": "assistant",
+        "createdAt": 1700065743395,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1700065751851,
+        "fromModel": "gpt-3.5-turbo-16k",
+        "tools": [
+          {
+            "apiName": "getWebsiteContent",
+            "arguments": "{\n  \"url\": \"https://nodejs.org/api/packages.html#dual-package-hazard\"\n}",
+            "id": "tool_call_9cRjevRQ",
+            "identifier": "website-crawler",
+            "type": "default"
+          }
+        ]
+      },
+      {
+        "content": "bbbbb",
+        "id": "9cRjevRQ",
+        "meta": {},
+        "parentId": "tool_calls_9cRjevRQ",
+        "role": "tool",
+        "plugin": {
+          "apiName": "getWebsiteContent",
+          "arguments": "{\n  \"url\": \"https://nodejs.org/api/packages.html#dual-package-hazard\"\n}",
+          "identifier": "website-crawler",
+          "type": "default"
+        },
+        "createdAt": 1700065743405,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1700065751861,
+        "fromModel": "gpt-3.5-turbo-16k",
+        "tool_call_id": "tool_call_9cRjevRQ"
+      }
+    ],
+    "topics": [
+      {
+        "id": "CZg3IEdy",
+        "title": "Prisma中的cuid函数作用是什么",
+        "createdAt": 1698463911747,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698463915716
+      },
+      {
+        "id": "MNWhBEh5",
+        "title": "启动 PostgreSQL Docker 镜像方法",
+        "createdAt": 1698754463117,
+        "sessionId": "06cc3e20-e870-4099-a619-c07a849d742d",
+        "updatedAt": 1698754464867
+      }
+    ],
+    "sessionGroups": []
+  },
+  "version": 5
+}
diff --git a/src/migrations/FromV4ToV5/fixtures/function-input-v4.json b/src/migrations/FromV4ToV5/fixtures/function-input-v4.json
new file mode 100644
index 000000000000..f83e41855a29
--- /dev/null
+++ b/src/migrations/FromV4ToV5/fixtures/function-input-v4.json
@@ -0,0 +1,96 @@
+{
+  "exportType": "sessions",
+  "state": {
+    "messages": [
+      {
+        "role": "function",
+        "content": "[{\"city\":\"杭州市\",\"adcode\":\"330100\",\"province\":\"浙江\",\"reporttime\":\"2024-05-11 10:32:32\",\"casts\":[{\"date\":\"2024-05-11\",\"week\":\"6\",\"dayweather\":\"中雨\",\"nightweather\":\"中雨\",\"daytemp\":\"25\",\"nighttemp\":\"18\",\"daywind\":\"南\",\"nightwind\":\"南\",\"daypower\":\"4\",\"nightpower\":\"4\",\"daytemp_float\":\"25.0\",\"nighttemp_float\":\"18.0\"},{\"date\":\"2024-05-12\",\"week\":\"7\",\"dayweather\":\"多云\",\"nightweather\":\"晴\",\"daytemp\":\"25\",\"nighttemp\":\"16\",\"daywind\":\"北\",\"nightwind\":\"北\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"25.0\",\"nighttemp_float\":\"16.0\"},{\"date\":\"2024-05-13\",\"week\":\"1\",\"dayweather\":\"晴\",\"nightweather\":\"晴\",\"daytemp\":\"28\",\"nighttemp\":\"16\",\"daywind\":\"东北\",\"nightwind\":\"东北\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"28.0\",\"nighttemp_float\":\"16.0\"},{\"date\":\"2024-05-14\",\"week\":\"2\",\"dayweather\":\"多云\",\"nightweather\":\"阴\",\"daytemp\":\"28\",\"nighttemp\":\"15\",\"daywind\":\"东\",\"nightwind\":\"东\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"28.0\",\"nighttemp_float\":\"15.0\"}]}]",
+        "fromModel": "gpt-3.5-turbo",
+        "fromProvider": "openai",
+        "parentId": "WwRdrdCz",
+        "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f",
+        "topicId": "yjkdZafZ",
+        "createdAt": 1715395810026,
+        "id": "UQ5VAGkk",
+        "updatedAt": 1715395822026,
+        "traceId": "4a8381a2-5582-46ce-87d8-c0f010de5c68",
+        "observationId": "904f7ab0-4066-4f9b-84fa-b1677974f976",
+        "plugin": {
+          "apiName": "fetchCurrentWeather",
+          "arguments": "{\n  \"city\": \"杭州\"\n}",
+          "identifier": "realtime-weather",
+          "type": "default"
+        }
+      },
+      {
+        "role": "user",
+        "content": "杭州天气如何？\n\n",
+        "files": [],
+        "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f",
+        "topicId": "yjkdZafZ",
+        "createdAt": 1715394828164,
+        "id": "WwRdrdCz",
+        "updatedAt": 1715394828164
+      },
+      {
+        "role": "assistant",
+        "content": "杭州的天气情况如下：\n\n- 今天（5月11日）白天和晚上都有中雨，白天温度为25摄氏度，晚上温度为18摄氏度，南风，风力为4级。\n- 明天（5月12日）白天多云，晚上晴，白天温度为25摄氏度，晚上温度为16摄氏度，北风，风力为1-3级。\n- 后天（5月13日）白天和晚上都是晴天，白天温度为28摄氏度，晚上温度为16摄氏度，东北风，风力为1-3级。\n- 大后天（5月14日）白天多云，晚上阴天，白天温度为28摄氏度，晚上温度为15摄氏度，东风，风力为1-3级。\n\n请注意根据实际情况做好防雨和保暖措施。",
+        "fromModel": "gpt-3.5-turbo",
+        "fromProvider": "openai",
+        "parentId": "UQ5VAGkk",
+        "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f",
+        "topicId": "yjkdZafZ",
+        "createdAt": 1715395822125,
+        "id": "hbqsYoMn",
+        "updatedAt": 1715395830606,
+        "traceId": "4a8381a2-5582-46ce-87d8-c0f010de5c68",
+        "observationId": "cad6a5bd-6a7a-4ff9-b37a-3deaa23b094a"
+      }
+    ],
+    "sessions": [
+      {
+        "config": {
+          "autoCreateTopicThreshold": 2,
+          "displayMode": "chat",
+          "enableAutoCreateTopic": true,
+          "historyCount": 1,
+          "model": "gpt-3.5-turbo",
+          "params": {
+            "frequency_penalty": 0,
+            "presence_penalty": 0,
+            "temperature": 0.6,
+            "top_p": 1
+          },
+          "plugins": ["realtime-weather", "steam"],
+          "provider": "openai",
+          "systemRole": "",
+          "tts": {
+            "showAllLocaleVoice": false,
+            "sttLocale": "auto",
+            "ttsService": "openai",
+            "voice": { "openai": "alloy" }
+          }
+        },
+        "group": "default",
+        "meta": {},
+        "pinned": false,
+        "type": "agent",
+        "createdAt": 1715394818440,
+        "id": "8eb073bb-e133-4dbd-81a2-5305a8447b5f",
+        "updatedAt": 1715394822366,
+        "model": "gpt-3.5-turbo"
+      }
+    ],
+    "topics": [
+      {
+        "title": "杭州天气查询",
+        "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f",
+        "createdAt": 1715394835099,
+        "id": "yjkdZafZ",
+        "updatedAt": 1715394838061,
+        "favorite": false
+      }
+    ]
+  },
+  "version": 4
+}
diff --git a/src/migrations/FromV4ToV5/fixtures/function-output-v5.json b/src/migrations/FromV4ToV5/fixtures/function-output-v5.json
new file mode 100644
index 000000000000..439747b37f6b
--- /dev/null
+++ b/src/migrations/FromV4ToV5/fixtures/function-output-v5.json
@@ -0,0 +1,120 @@
+{
+  "exportType": "sessions",
+  "state": {
+    "messages": [
+      {
+        "role": "assistant",
+        "content": "",
+        "fromModel": "gpt-3.5-turbo",
+        "fromProvider": "openai",
+        "parentId": "WwRdrdCz",
+        "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f",
+        "topicId": "yjkdZafZ",
+        "createdAt": 1715395810016,
+        "id": "tool_calls_UQ5VAGkk",
+        "updatedAt": 1715395822016,
+        "traceId": "4a8381a2-5582-46ce-87d8-c0f010de5c68",
+        "observationId": "904f7ab0-4066-4f9b-84fa-b1677974f976",
+        "tools": [
+          {
+            "id": "tool_call_UQ5VAGkk",
+            "apiName": "fetchCurrentWeather",
+            "arguments": "{\n  \"city\": \"杭州\"\n}",
+            "identifier": "realtime-weather",
+            "type": "default"
+          }
+        ]
+      },
+      {
+        "role": "tool",
+        "content": "[{\"city\":\"杭州市\",\"adcode\":\"330100\",\"province\":\"浙江\",\"reporttime\":\"2024-05-11 10:32:32\",\"casts\":[{\"date\":\"2024-05-11\",\"week\":\"6\",\"dayweather\":\"中雨\",\"nightweather\":\"中雨\",\"daytemp\":\"25\",\"nighttemp\":\"18\",\"daywind\":\"南\",\"nightwind\":\"南\",\"daypower\":\"4\",\"nightpower\":\"4\",\"daytemp_float\":\"25.0\",\"nighttemp_float\":\"18.0\"},{\"date\":\"2024-05-12\",\"week\":\"7\",\"dayweather\":\"多云\",\"nightweather\":\"晴\",\"daytemp\":\"25\",\"nighttemp\":\"16\",\"daywind\":\"北\",\"nightwind\":\"北\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"25.0\",\"nighttemp_float\":\"16.0\"},{\"date\":\"2024-05-13\",\"week\":\"1\",\"dayweather\":\"晴\",\"nightweather\":\"晴\",\"daytemp\":\"28\",\"nighttemp\":\"16\",\"daywind\":\"东北\",\"nightwind\":\"东北\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"28.0\",\"nighttemp_float\":\"16.0\"},{\"date\":\"2024-05-14\",\"week\":\"2\",\"dayweather\":\"多云\",\"nightweather\":\"阴\",\"daytemp\":\"28\",\"nighttemp\":\"15\",\"daywind\":\"东\",\"nightwind\":\"东\",\"daypower\":\"1-3\",\"nightpower\":\"1-3\",\"daytemp_float\":\"28.0\",\"nighttemp_float\":\"15.0\"}]}]",
+        "fromModel": "gpt-3.5-turbo",
+        "fromProvider": "openai",
+        "parentId": "tool_calls_UQ5VAGkk",
+        "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f",
+        "topicId": "yjkdZafZ",
+        "createdAt": 1715395810026,
+        "id": "UQ5VAGkk",
+        "updatedAt": 1715395822026,
+        "traceId": "4a8381a2-5582-46ce-87d8-c0f010de5c68",
+        "observationId": "904f7ab0-4066-4f9b-84fa-b1677974f976",
+        "tool_call_id": "tool_call_UQ5VAGkk",
+        "plugin": {
+          "apiName": "fetchCurrentWeather",
+          "arguments": "{\n  \"city\": \"杭州\"\n}",
+          "identifier": "realtime-weather",
+          "type": "default"
+        }
+      },
+      {
+        "role": "user",
+        "content": "杭州天气如何？\n\n",
+        "files": [],
+        "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f",
+        "topicId": "yjkdZafZ",
+        "createdAt": 1715394828164,
+        "id": "WwRdrdCz",
+        "updatedAt": 1715394828164
+      },
+      {
+        "role": "assistant",
+        "content": "杭州的天气情况如下：\n\n- 今天（5月11日）白天和晚上都有中雨，白天温度为25摄氏度，晚上温度为18摄氏度，南风，风力为4级。\n- 明天（5月12日）白天多云，晚上晴，白天温度为25摄氏度，晚上温度为16摄氏度，北风，风力为1-3级。\n- 后天（5月13日）白天和晚上都是晴天，白天温度为28摄氏度，晚上温度为16摄氏度，东北风，风力为1-3级。\n- 大后天（5月14日）白天多云，晚上阴天，白天温度为28摄氏度，晚上温度为15摄氏度，东风，风力为1-3级。\n\n请注意根据实际情况做好防雨和保暖措施。",
+        "fromModel": "gpt-3.5-turbo",
+        "fromProvider": "openai",
+        "parentId": "UQ5VAGkk",
+        "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f",
+        "topicId": "yjkdZafZ",
+        "createdAt": 1715395822125,
+        "id": "hbqsYoMn",
+        "updatedAt": 1715395830606,
+        "traceId": "4a8381a2-5582-46ce-87d8-c0f010de5c68",
+        "observationId": "cad6a5bd-6a7a-4ff9-b37a-3deaa23b094a"
+      }
+    ],
+    "sessions": [
+      {
+        "config": {
+          "autoCreateTopicThreshold": 2,
+          "displayMode": "chat",
+          "enableAutoCreateTopic": true,
+          "historyCount": 1,
+          "model": "gpt-3.5-turbo",
+          "params": {
+            "frequency_penalty": 0,
+            "presence_penalty": 0,
+            "temperature": 0.6,
+            "top_p": 1
+          },
+          "plugins": ["realtime-weather", "steam"],
+          "provider": "openai",
+          "systemRole": "",
+          "tts": {
+            "showAllLocaleVoice": false,
+            "sttLocale": "auto",
+            "ttsService": "openai",
+            "voice": { "openai": "alloy" }
+          }
+        },
+        "group": "default",
+        "meta": {},
+        "pinned": false,
+        "type": "agent",
+        "createdAt": 1715394818440,
+        "id": "8eb073bb-e133-4dbd-81a2-5305a8447b5f",
+        "updatedAt": 1715394822366,
+        "model": "gpt-3.5-turbo"
+      }
+    ],
+    "topics": [
+      {
+        "title": "杭州天气查询",
+        "sessionId": "8eb073bb-e133-4dbd-81a2-5305a8447b5f",
+        "createdAt": 1715394835099,
+        "id": "yjkdZafZ",
+        "updatedAt": 1715394838061,
+        "favorite": false
+      }
+    ]
+  },
+  "version": 5
+}
diff --git a/src/migrations/FromV4ToV5/index.ts b/src/migrations/FromV4ToV5/index.ts
new file mode 100644
index 000000000000..366188536a2e
--- /dev/null
+++ b/src/migrations/FromV4ToV5/index.ts
@@ -0,0 +1,58 @@
+import type { Migration, MigrationData } from '@/migrations/VersionController';
+
+import { V4ConfigState, V4Message } from './types/v4';
+import { V5ConfigState, V5Message } from './types/v5';
+
+export class MigrationV4ToV5 implements Migration {
+  // from this version to start migration
+  version = 4;
+
+  migrate(data: MigrationData<V4ConfigState>): MigrationData<V5ConfigState> {
+    const { messages } = data.state;
+
+    return {
+      ...data,
+      state: {
+        ...data.state,
+        messages: MigrationV4ToV5.migrateMessage(messages),
+      },
+    };
+  }
+
+  static migrateMessage(messages: V4Message[]): V5Message[] {
+    let v5Messages: V5Message[] = [];
+
+    messages.forEach((item) => {
+      if (item.role === 'function') {
+        const toolCallId = `tool_call_${item.id}`;
+        const assistantMessageId = `tool_calls_${item.id}`;
+        const assistantMessage: V5Message = {
+          ...item,
+          content: '',
+          // make sure the createdAt is before than tool message
+          createdAt: item.createdAt - 10,
+          id: assistantMessageId,
+          plugin: undefined,
+          role: 'assistant',
+          tools: [{ ...item.plugin!, id: toolCallId }],
+          updatedAt: item.updatedAt - 10,
+        };
+
+        const toolMessage: V5Message = {
+          ...item,
+          parentId: assistantMessageId,
+          role: 'tool',
+          tool_call_id: toolCallId,
+        };
+        v5Messages.push(assistantMessage, toolMessage);
+      }
+
+      // if not function message, just push it
+      else {
+        v5Messages.push(item as V5Message);
+      }
+    });
+
+    return v5Messages;
+  }
+}
diff --git a/src/migrations/FromV4ToV5/migrations.test.ts b/src/migrations/FromV4ToV5/migrations.test.ts
new file mode 100644
index 000000000000..9ce33d5fc50e
--- /dev/null
+++ b/src/migrations/FromV4ToV5/migrations.test.ts
@@ -0,0 +1,49 @@
+import { describe } from 'vitest';
+
+import { MigrationData, VersionController } from '@/migrations/VersionController';
+
+import { MigrationV1ToV2 } from '../FromV1ToV2';
+import inputV1Data from '../FromV1ToV2/fixtures/input-v1-session.json';
+import { MigrationV2ToV3 } from '../FromV2ToV3';
+import { MigrationV3ToV4 } from '../FromV3ToV4';
+import outputDataFromV1ToV5 from './fixtures/from-v1-to-v5-output.json';
+import functionInputV4 from './fixtures/function-input-v4.json';
+import functionOutputV5 from './fixtures/function-output-v5.json';
+import { MigrationV4ToV5 } from './index';
+
+describe('MigrationV4ToV5', () => {
+  let migrations;
+  let versionController: VersionController<any>;
+
+  beforeEach(() => {
+    migrations = [MigrationV4ToV5];
+    versionController = new VersionController(migrations, 5);
+  });
+
+  describe('should migrate data correctly from previous versions', () => {
+    it('role=function', () => {
+      const data: MigrationData = functionInputV4;
+
+      const migratedData = versionController.migrate(data);
+
+      expect(migratedData.version).toEqual(functionOutputV5.version);
+      expect(migratedData.state.messages).toEqual(functionOutputV5.state.messages);
+    });
+  });
+
+  it('should work correct from v1 to v5', () => {
+    const data: MigrationData = inputV1Data;
+
+    versionController = new VersionController(
+      [MigrationV4ToV5, MigrationV3ToV4, MigrationV2ToV3, MigrationV1ToV2],
+      5,
+    );
+
+    const migratedData = versionController.migrate(data);
+
+    expect(migratedData.version).toEqual(outputDataFromV1ToV5.version);
+    expect(migratedData.state.messages).toEqual(outputDataFromV1ToV5.state.messages);
+    expect(migratedData.state.sessions).toEqual(outputDataFromV1ToV5.state.sessions);
+    expect(migratedData.state.topics).toEqual(outputDataFromV1ToV5.state.topics);
+  });
+});
diff --git a/src/migrations/FromV4ToV5/types/v4.ts b/src/migrations/FromV4ToV5/types/v4.ts
new file mode 100644
index 000000000000..ee51440a916c
--- /dev/null
+++ b/src/migrations/FromV4ToV5/types/v4.ts
@@ -0,0 +1,21 @@
+import { LobeToolRenderType } from '@/types/tool';
+
+export interface V4ChatPluginPayload {
+  apiName: string;
+  arguments: string;
+  identifier: string;
+  type: LobeToolRenderType;
+}
+
+export interface V4Message {
+  content: string;
+  createdAt: number;
+  id: string;
+  plugin?: V4ChatPluginPayload;
+  role: 'user' | 'system' | 'assistant' | 'function';
+  updatedAt: number;
+}
+
+export interface V4ConfigState {
+  messages: V4Message[];
+}
diff --git a/src/migrations/FromV4ToV5/types/v5.ts b/src/migrations/FromV4ToV5/types/v5.ts
new file mode 100644
index 000000000000..4241b3b2e886
--- /dev/null
+++ b/src/migrations/FromV4ToV5/types/v5.ts
@@ -0,0 +1,27 @@
+import { LobeToolRenderType } from '@/types/tool';
+
+import { V4ChatPluginPayload } from './v4';
+
+interface ChatToolPayload {
+  apiName: string;
+  arguments: string;
+  id: string;
+  identifier: string;
+  type: LobeToolRenderType;
+}
+
+export interface V5Message {
+  content: string;
+  createdAt: number;
+  id: string;
+  parentId?: string;
+  plugin?: V4ChatPluginPayload;
+  role: 'user' | 'system' | 'assistant' | 'tool';
+  tool_call_id?: string;
+  tools?: ChatToolPayload[];
+  updatedAt: number;
+}
+
+export interface V5ConfigState {
+  messages: V5Message[];
+}
diff --git a/src/migrations/index.ts b/src/migrations/index.ts
index a7c9304bbe8b..b91c22897e29 100644
--- a/src/migrations/index.ts
+++ b/src/migrations/index.ts
@@ -5,12 +5,19 @@ import { ConfigStateAll } from '@/types/exportConfig';
 import { MigrationV0ToV1 } from './FromV0ToV1';
 import { MigrationV1ToV2 } from './FromV1ToV2';
 import { MigrationV3ToV4 } from './FromV3ToV4';
+import { MigrationV4ToV5 } from './FromV4ToV5';
 
 // Current latest version
-export const CURRENT_CONFIG_VERSION = 4;
+export const CURRENT_CONFIG_VERSION = 5;
 
 // Version migrations module
 const ConfigMigrations = [
+  /**
+   * 2024.05.11
+   *
+   * role=function to role=tool
+   */
+  MigrationV4ToV5,
   /**
    * 2024.04.09
    * settings migrate the `languageModel`
diff --git a/src/services/chat.ts b/src/services/chat.ts
index 9db2af302bdf..316a39dd1b8a 100644
--- a/src/services/chat.ts
+++ b/src/services/chat.ts
@@ -427,18 +427,12 @@ class ChatService {
                   name: genToolCallingName(tool.identifier, tool.apiName, tool.type),
                 },
                 id: tool.id,
-                type: tool.type,
+                type: 'function',
               }),
             ),
           };
         }
 
-        // TODO: need to be removed after upgrade
-        case 'function': {
-          const name = m.plugin?.identifier as string;
-          return { content: m.content, name, role: m.role };
-        }
-
         case 'tool': {
           return {
             content: m.content,
diff --git a/src/store/chat/slices/message/action.ts b/src/store/chat/slices/message/action.ts
index 1bae1ab27fef..54c5719557f5 100644
--- a/src/store/chat/slices/message/action.ts
+++ b/src/store/chat/slices/message/action.ts
@@ -564,7 +564,7 @@ export const chatMessage: StateCreator<
     let contextMessages: ChatMessage[] = [];
 
     switch (currentMessage.role) {
-      case 'function':
+      case 'tool':
       case 'user': {
         contextMessages = chats.slice(0, currentIndex + 1);
         break;
diff --git a/src/store/chat/slices/share/action.test.ts b/src/store/chat/slices/share/action.test.ts
index 6e1caa6bccf0..71b6617b01de 100644
--- a/src/store/chat/slices/share/action.test.ts
+++ b/src/store/chat/slices/share/action.test.ts
@@ -85,7 +85,7 @@ describe('shareSlice actions', () => {
     it('should include plugin information when withPluginInfo is true', async () => {
       // 模拟带有插件信息的消息
       const pluginMessage = {
-        role: 'function',
+        role: 'tool',
         content: 'plugin content',
         plugin: {
           type: 'default',
@@ -118,7 +118,7 @@ describe('shareSlice actions', () => {
 
     it('should not include plugin information when withPluginInfo is false', async () => {
       const pluginMessage = {
-        role: 'function',
+        role: 'tool',
         content: 'plugin content',
         plugin: {
           type: 'default',
diff --git a/src/store/chat/slices/share/action.ts b/src/store/chat/slices/share/action.ts
index 5922bbaf20a6..2bb03d928aac 100644
--- a/src/store/chat/slices/share/action.ts
+++ b/src/store/chat/slices/share/action.ts
@@ -81,7 +81,7 @@ export const chatShare: StateCreator<ChatStore, [['zustand/devtools', never]], [
             draft.push({ from: 'gpt', value: i.content });
             break;
           }
-          case 'function': {
+          case 'tool': {
             if (withPluginInfo)
               draft.push(
                 PLUGIN_INFO({
diff --git a/src/types/llm.ts b/src/types/llm.ts
index 2e9d87ef3755..4212c329aaa1 100644
--- a/src/types/llm.ts
+++ b/src/types/llm.ts
@@ -75,7 +75,7 @@ export interface LLMParams {
   top_p?: number;
 }
 
-export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function' | 'tool';
+export type LLMRoleType = 'user' | 'system' | 'assistant' | 'tool';
 
 export interface LLMMessage {
   content: string;
diff --git a/src/types/message/index.ts b/src/types/message/index.ts
index 5f18ed5b2af0..d983a584c05a 100644
--- a/src/types/message/index.ts
+++ b/src/types/message/index.ts
@@ -3,11 +3,12 @@ import { IPluginErrorType } from '@lobehub/chat-plugin-sdk';
 import { ILobeAgentRuntimeErrorType } from '@/libs/agent-runtime';
 import { ErrorType } from '@/types/fetch';
 
-import { LLMRoleType } from '../llm';
 import { BaseDataModel } from '../meta';
 import { ChatPluginPayload, ChatToolPayload } from './tools';
 import { Translate } from './translate';
 
+export type MessageRoleType = 'user' | 'system' | 'assistant' | 'tool';
+
 /**
  * 聊天消息错误对象
  */
@@ -43,10 +44,6 @@ export interface ChatMessage extends BaseDataModel {
   } & Record<string, any>;
 
   files?: string[];
-  /**
-   * only used in tool calling
-   */
-  name?: string;
   /**
    * observation id
    */
@@ -66,7 +63,7 @@ export interface ChatMessage extends BaseDataModel {
   /**
    * message role type
    */
-  role: LLMRoleType;
+  role: MessageRoleType;
   sessionId?: string;
 
   tool_call_id?: string;

From 59402d00e13634284e73364cc215adcf48df009e Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Sat, 11 May 2024 05:55:16 +0000
Subject: [PATCH 17/24] =?UTF-8?q?=F0=9F=97=83=EF=B8=8F=20refactor:=20migra?=
 =?UTF-8?q?te=20database?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/database/client/core/db.ts      | 32 +++++++++++++++++++++++++++++
 src/database/client/core/schemas.ts |  9 ++++++++
 2 files changed, 41 insertions(+)

diff --git a/src/database/client/core/db.ts b/src/database/client/core/db.ts
index 9c16d69f00dc..7de75d88eb08 100644
--- a/src/database/client/core/db.ts
+++ b/src/database/client/core/db.ts
@@ -19,6 +19,7 @@ import {
   dbSchemaV5,
   dbSchemaV6,
   dbSchemaV7,
+  dbSchemaV9,
 } from './schemas';
 import { DBModel, LOBE_CHAT_LOCAL_DB_NAME } from './types/db';
 
@@ -67,6 +68,10 @@ export class BrowserDB extends Dexie {
       .stores(dbSchemaV7)
       .upgrade((trans) => this.upgradeToV8(trans));
 
+    this.version(9)
+      .stores(dbSchemaV9)
+      .upgrade((trans) => this.upgradeToV9(trans));
+
     this.files = this.table('files');
     this.sessions = this.table('sessions');
     this.messages = this.table('messages');
@@ -153,6 +158,33 @@ export class BrowserDB extends Dexie {
       }
     });
   };
+
+  upgradeToV9 = async (trans: Transaction) => {
+    const messages = trans.table('messages');
+    await messages.toCollection().modify(async (message: DBModel<DB_Message>) => {
+      if ((message.role as string) === 'function') {
+        const origin = Object.assign({}, message);
+
+        const toolCallId = `tool_call_${message.id}`;
+        const assistantMessageId = `tool_calls_${message.id}`;
+
+        message.role = 'tool';
+        message.tool_call_id = toolCallId;
+        message.parentId = assistantMessageId;
+
+        await messages.add({
+          ...origin,
+          content: '',
+          createdAt: message.createdAt - 10,
+          error: undefined,
+          id: assistantMessageId,
+          role: 'assistant',
+          tools: [{ ...message.plugin!, id: toolCallId }],
+          updatedAt: message.updatedAt - 10,
+        } as DBModel<DB_Message>);
+      }
+    });
+  };
 }
 
 export const browserDB = new BrowserDB();
diff --git a/src/database/client/core/schemas.ts b/src/database/client/core/schemas.ts
index f07a699c96c4..ae066de791d7 100644
--- a/src/database/client/core/schemas.ts
+++ b/src/database/client/core/schemas.ts
@@ -76,3 +76,12 @@ export const dbSchemaV7 = {
   plugins:
     '&identifier, id, type, manifest.type, manifest.meta.title, manifest.meta.description, manifest.meta.author, createdAt, updatedAt',
 };
+// ************************************** //
+// ******* Version 9 - 2024-03-14 ******* //
+// ************************************** //
+// - Added id to `plugins` table
+export const dbSchemaV9 = {
+  ...dbSchemaV7,
+  messages:
+    '&id, role, content, fromModel, favorite, tool_call_id, plugin.identifier, plugin.apiName, translate.content, createdAt, updatedAt, sessionId, topicId, quotaId, parentId, [sessionId+topicId], traceId',
+};

From 41e9ec408ea91a5e0734ea69d40a42ef32e253b6 Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Sat, 11 May 2024 06:41:24 +0000
Subject: [PATCH 18/24] =?UTF-8?q?=E2=9C=85=20test:=20fix=20test?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../client/models/__tests__/message.test.ts   |  2 +-
 src/services/__tests__/chat.test.ts           | 28 ++++++-------------
 src/store/chat/slices/share/action.test.ts    |  2 +-
 src/utils/toolCall.ts                         |  2 +-
 4 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/src/database/client/models/__tests__/message.test.ts b/src/database/client/models/__tests__/message.test.ts
index 26687e7c6b0a..23f7819a6b15 100644
--- a/src/database/client/models/__tests__/message.test.ts
+++ b/src/database/client/models/__tests__/message.test.ts
@@ -271,7 +271,7 @@ describe('MessageModel', () => {
       await MessageModel.update(createdMessage.id, updateData);
       const updatedMessage = await MessageModel.findById(createdMessage.id);
 
-      expect(updatedMessage).toHaveProperty('role', 'function');
+      expect(updatedMessage).toHaveProperty('role', 'tool');
     });
   });
 
diff --git a/src/services/__tests__/chat.test.ts b/src/services/__tests__/chat.test.ts
index b9f403533e2b..376a89ab9859 100644
--- a/src/services/__tests__/chat.test.ts
+++ b/src/services/__tests__/chat.test.ts
@@ -126,7 +126,7 @@ describe('ChatService', () => {
       it('should include image content when with vision model', async () => {
         const messages = [
           { content: 'Hello', role: 'user', files: ['file1'] }, // Message with files
-          { content: 'Hi', role: 'function', plugin: { identifier: 'plugin1' } }, // Message with function role
+          { content: 'Hi', role: 'tool', plugin: { identifier: 'plugin1', apiName: 'api1' } }, // Message with tool role
           { content: 'Hey', role: 'assistant' }, // Regular user message
         ] as ChatMessage[];
 
@@ -166,8 +166,8 @@ describe('ChatService', () => {
               },
               {
                 content: 'Hi',
-                name: 'plugin1',
-                role: 'function',
+                name: 'plugin1____api1',
+                role: 'tool',
               },
               {
                 content: 'Hey',
@@ -183,7 +183,7 @@ describe('ChatService', () => {
       it('should not include image content when default model', async () => {
         const messages = [
           { content: 'Hello', role: 'user', files: ['file1'] }, // Message with files
-          { content: 'Hi', role: 'function', plugin: { identifier: 'plugin1' } }, // Message with function role
+          { content: 'Hi', role: 'tool', plugin: { identifier: 'plugin1', apiName: 'api1' } }, // Message with function role
           { content: 'Hey', role: 'assistant' }, // Regular user message
         ] as ChatMessage[];
 
@@ -212,7 +212,7 @@ describe('ChatService', () => {
           {
             messages: [
               { content: 'Hello', role: 'user' },
-              { content: 'Hi', name: 'plugin1', role: 'function' },
+              { content: 'Hi', name: 'plugin1____api1', role: 'tool' },
               { content: 'Hey', role: 'assistant' },
             ],
             model: 'gpt-3.5-turbo',
@@ -224,7 +224,7 @@ describe('ChatService', () => {
       it('should not include image with vision models when can not find the image', async () => {
         const messages = [
           { content: 'Hello', role: 'user', files: ['file2'] }, // Message with files
-          { content: 'Hi', role: 'function', plugin: { identifier: 'plugin1' } }, // Message with function role
+          { content: 'Hi', role: 'tool', plugin: { identifier: 'plugin1', apiName: 'api1' } }, // Message with function role
           { content: 'Hey', role: 'assistant' }, // Regular user message
         ] as ChatMessage[];
 
@@ -248,19 +248,9 @@ describe('ChatService', () => {
         expect(getChatCompletionSpy).toHaveBeenCalledWith(
           {
             messages: [
-              {
-                content: 'Hello',
-                role: 'user',
-              },
-              {
-                content: 'Hi',
-                name: 'plugin1',
-                role: 'function',
-              },
-              {
-                content: 'Hey',
-                role: 'assistant',
-              },
+              { content: 'Hello', role: 'user' },
+              { content: 'Hi', name: 'plugin1____api1', role: 'tool' },
+              { content: 'Hey', role: 'assistant' },
             ],
           },
           undefined,
diff --git a/src/store/chat/slices/share/action.test.ts b/src/store/chat/slices/share/action.test.ts
index 71b6617b01de..f40ae56181e9 100644
--- a/src/store/chat/slices/share/action.test.ts
+++ b/src/store/chat/slices/share/action.test.ts
@@ -154,7 +154,7 @@ describe('shareSlice actions', () => {
         { role: 'user', content: 'user message', id: '1' },
         { role: 'assistant', content: 'assistant message', id: '2' },
         {
-          role: 'function',
+          role: 'tool',
           content: 'plugin content',
           plugin: {
             type: 'default',
diff --git a/src/utils/toolCall.ts b/src/utils/toolCall.ts
index 59046f697992..0f7737404b34 100644
--- a/src/utils/toolCall.ts
+++ b/src/utils/toolCall.ts
@@ -2,7 +2,7 @@ import { Md5 } from 'ts-md5';
 
 import { PLUGIN_SCHEMA_API_MD5_PREFIX, PLUGIN_SCHEMA_SEPARATOR } from '@/const/plugin';
 
-export const genToolCallingName = (identifier: string, name: string, type?: string) => {
+export const genToolCallingName = (identifier: string, name: string, type: string = 'default') => {
   const pluginType = type && type !== 'default' ? `${PLUGIN_SCHEMA_SEPARATOR + type}` : '';
 
   // 将插件的 identifier 作为前缀，避免重复

From e7ae1006de2761a663291d9b178a958bdc7d0931 Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Sat, 11 May 2024 07:36:18 +0000
Subject: [PATCH 19/24] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20refactor?=
 =?UTF-8?q?=20createCallbacksTransformer=20to=20fix=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/libs/agent-runtime/types/chat.ts          | 18 +++++--
 .../agent-runtime/utils/streams/anthropic.ts  |  3 +-
 .../utils/streams/bedrock/claude.ts           |  8 +--
 .../utils/streams/bedrock/llama.ts            | 13 +++--
 .../agent-runtime/utils/streams/google-ai.ts  |  3 +-
 .../agent-runtime/utils/streams/minimax.ts    |  3 +-
 .../agent-runtime/utils/streams/ollama.ts     | 18 ++++---
 .../agent-runtime/utils/streams/openai.ts     |  3 +-
 .../agent-runtime/utils/streams/protocol.ts   | 51 +++++++++++++++++++
 9 files changed, 96 insertions(+), 24 deletions(-)

diff --git a/src/libs/agent-runtime/types/chat.ts b/src/libs/agent-runtime/types/chat.ts
index 93aebf994997..dabe06a62461 100644
--- a/src/libs/agent-runtime/types/chat.ts
+++ b/src/libs/agent-runtime/types/chat.ts
@@ -1,5 +1,3 @@
-import { OpenAIStreamCallbacks } from 'ai';
-
 import { MessageToolCall } from '@/types/message';
 
 export type LLMRoleType = 'user' | 'system' | 'assistant' | 'function' | 'tool';
@@ -131,4 +129,18 @@ export interface ChatCompletionTool {
   type: 'function';
 }
 
-export type ChatStreamCallbacks = OpenAIStreamCallbacks;
+export interface ChatStreamCallbacks {
+  /**
+   * `onCompletion`: Called for each tokenized message.
+   **/
+  onCompletion?: (completion: string) => Promise<void> | void;
+  /** `onFinal`: Called once when the stream is closed with the final completion message. */
+  onFinal?: (completion: string) => Promise<void> | void;
+  /** `onStart`: Called once when the stream is initialized. */
+  onStart?: () => Promise<void> | void;
+  /** `onText`: Called for each text chunk. */
+  onText?: (text: string) => Promise<void> | void;
+  /** `onToken`: Called for each tokenized message. */
+  onToken?: (token: string) => Promise<void> | void;
+  onToolCall?: () => Promise<void> | void;
+}
diff --git a/src/libs/agent-runtime/utils/streams/anthropic.ts b/src/libs/agent-runtime/utils/streams/anthropic.ts
index d60f3362470e..e17c0ffa42e3 100644
--- a/src/libs/agent-runtime/utils/streams/anthropic.ts
+++ b/src/libs/agent-runtime/utils/streams/anthropic.ts
@@ -1,6 +1,6 @@
 import Anthropic from '@anthropic-ai/sdk';
 import type { Stream } from '@anthropic-ai/sdk/streaming';
-import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai';
+import { readableFromAsyncIterable } from 'ai';
 
 import { ChatStreamCallbacks } from '../../types';
 import {
@@ -8,6 +8,7 @@ import {
   StreamProtocolToolCallChunk,
   StreamStack,
   StreamToolCallChunkData,
+  createCallbacksTransformer,
   createSSEProtocolTransformer,
 } from './protocol';
 
diff --git a/src/libs/agent-runtime/utils/streams/bedrock/claude.ts b/src/libs/agent-runtime/utils/streams/bedrock/claude.ts
index bfd2945b8329..ac74a985c813 100644
--- a/src/libs/agent-runtime/utils/streams/bedrock/claude.ts
+++ b/src/libs/agent-runtime/utils/streams/bedrock/claude.ts
@@ -1,15 +1,15 @@
 import { InvokeModelWithResponseStreamResponse } from '@aws-sdk/client-bedrock-runtime';
-import { type AIStreamCallbacksAndOptions, createCallbacksTransformer } from 'ai';
 
 import { nanoid } from '@/utils/uuid';
 
+import { ChatStreamCallbacks } from '../../../types';
 import { transformAnthropicStream } from '../anthropic';
-import { StreamStack, createSSEProtocolTransformer } from '../protocol';
+import { StreamStack, createCallbacksTransformer, createSSEProtocolTransformer } from '../protocol';
 import { createBedrockStream } from './common';
 
 export const AWSBedrockClaudeStream = (
   res: InvokeModelWithResponseStreamResponse | ReadableStream,
-  cb?: AIStreamCallbacksAndOptions,
+  cb?: ChatStreamCallbacks,
 ): ReadableStream<string> => {
   const streamStack: StreamStack = { id: 'chat_' + nanoid() };
 
@@ -17,5 +17,5 @@ export const AWSBedrockClaudeStream = (
 
   return stream
     .pipeThrough(createSSEProtocolTransformer(transformAnthropicStream, streamStack))
-    .pipeThrough(createCallbacksTransformer(cb) as any);
+    .pipeThrough(createCallbacksTransformer(cb));
 };
diff --git a/src/libs/agent-runtime/utils/streams/bedrock/llama.ts b/src/libs/agent-runtime/utils/streams/bedrock/llama.ts
index adec93f736a5..78e0f5b327d5 100644
--- a/src/libs/agent-runtime/utils/streams/bedrock/llama.ts
+++ b/src/libs/agent-runtime/utils/streams/bedrock/llama.ts
@@ -1,9 +1,14 @@
 import { InvokeModelWithResponseStreamResponse } from '@aws-sdk/client-bedrock-runtime';
-import { type AIStreamCallbacksAndOptions, createCallbacksTransformer } from 'ai';
 
 import { nanoid } from '@/utils/uuid';
 
-import { StreamProtocolChunk, StreamStack, createSSEProtocolTransformer } from '../protocol';
+import { ChatStreamCallbacks } from '../../../types';
+import {
+  StreamProtocolChunk,
+  StreamStack,
+  createCallbacksTransformer,
+  createSSEProtocolTransformer,
+} from '../protocol';
 import { createBedrockStream } from './common';
 
 interface AmazonBedrockInvocationMetrics {
@@ -34,7 +39,7 @@ export const transformLlamaStream = (
 
 export const AWSBedrockLlamaStream = (
   res: InvokeModelWithResponseStreamResponse | ReadableStream,
-  cb?: AIStreamCallbacksAndOptions,
+  cb?: ChatStreamCallbacks,
 ): ReadableStream<string> => {
   const streamStack: StreamStack = { id: 'chat_' + nanoid() };
 
@@ -42,5 +47,5 @@ export const AWSBedrockLlamaStream = (
 
   return stream
     .pipeThrough(createSSEProtocolTransformer(transformLlamaStream, streamStack))
-    .pipeThrough(createCallbacksTransformer(cb) as any);
+    .pipeThrough(createCallbacksTransformer(cb));
 };
diff --git a/src/libs/agent-runtime/utils/streams/google-ai.ts b/src/libs/agent-runtime/utils/streams/google-ai.ts
index 92a6bfd10c8d..ff457c52be73 100644
--- a/src/libs/agent-runtime/utils/streams/google-ai.ts
+++ b/src/libs/agent-runtime/utils/streams/google-ai.ts
@@ -2,7 +2,7 @@ import {
   EnhancedGenerateContentResponse,
   GenerateContentStreamResult,
 } from '@google/generative-ai';
-import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai';
+import { readableFromAsyncIterable } from 'ai';
 
 import { nanoid } from '@/utils/uuid';
 
@@ -12,6 +12,7 @@ import {
   StreamStack,
   StreamToolCallChunkData,
   chatStreamable,
+  createCallbacksTransformer,
   createSSEProtocolTransformer,
   generateToolCallId,
 } from './protocol';
diff --git a/src/libs/agent-runtime/utils/streams/minimax.ts b/src/libs/agent-runtime/utils/streams/minimax.ts
index a517b9f794e4..05629bafdc01 100644
--- a/src/libs/agent-runtime/utils/streams/minimax.ts
+++ b/src/libs/agent-runtime/utils/streams/minimax.ts
@@ -1,9 +1,8 @@
-import { createCallbacksTransformer } from 'ai';
 import OpenAI from 'openai';
 
 import { ChatStreamCallbacks } from '../../types';
 import { transformOpenAIStream } from './openai';
-import { createSSEProtocolTransformer } from './protocol';
+import { createCallbacksTransformer, createSSEProtocolTransformer } from './protocol';
 
 const unit8ArrayToJSONChunk = (unit8Array: Uint8Array): OpenAI.ChatCompletionChunk => {
   const decoder = new TextDecoder();
diff --git a/src/libs/agent-runtime/utils/streams/ollama.ts b/src/libs/agent-runtime/utils/streams/ollama.ts
index 728b78dedff2..32d4c5197d8b 100644
--- a/src/libs/agent-runtime/utils/streams/ollama.ts
+++ b/src/libs/agent-runtime/utils/streams/ollama.ts
@@ -1,13 +1,15 @@
-import {
-  type AIStreamCallbacksAndOptions,
-  createCallbacksTransformer,
-  readableFromAsyncIterable,
-} from 'ai';
+import { readableFromAsyncIterable } from 'ai';
 import { ChatResponse } from 'ollama/browser';
 
+import { ChatStreamCallbacks } from '@/libs/agent-runtime';
 import { nanoid } from '@/utils/uuid';
 
-import { StreamProtocolChunk, StreamStack, createSSEProtocolTransformer } from './protocol';
+import {
+  StreamProtocolChunk,
+  StreamStack,
+  createCallbacksTransformer,
+  createSSEProtocolTransformer,
+} from './protocol';
 
 const transformOllamaStream = (chunk: ChatResponse, stack: StreamStack): StreamProtocolChunk => {
   // maybe need another structure to add support for multiple choices
@@ -26,11 +28,11 @@ const chatStreamable = async function* (stream: AsyncIterable<ChatResponse>) {
 
 export const OllamaStream = (
   res: AsyncIterable<ChatResponse>,
-  cb?: AIStreamCallbacksAndOptions,
+  cb?: ChatStreamCallbacks,
 ): ReadableStream<string> => {
   const streamStack: StreamStack = { id: 'chat_' + nanoid() };
 
   return readableFromAsyncIterable(chatStreamable(res))
     .pipeThrough(createSSEProtocolTransformer(transformOllamaStream, streamStack))
-    .pipeThrough(createCallbacksTransformer(cb) as any);
+    .pipeThrough(createCallbacksTransformer(cb));
 };
diff --git a/src/libs/agent-runtime/utils/streams/openai.ts b/src/libs/agent-runtime/utils/streams/openai.ts
index 908c6575a6fd..3867bc01198c 100644
--- a/src/libs/agent-runtime/utils/streams/openai.ts
+++ b/src/libs/agent-runtime/utils/streams/openai.ts
@@ -1,4 +1,4 @@
-import { createCallbacksTransformer, readableFromAsyncIterable } from 'ai';
+import { readableFromAsyncIterable } from 'ai';
 import OpenAI from 'openai';
 import type { Stream } from 'openai/streaming';
 
@@ -7,6 +7,7 @@ import {
   StreamProtocolChunk,
   StreamProtocolToolCallChunk,
   StreamToolCallChunkData,
+  createCallbacksTransformer,
   createSSEProtocolTransformer,
   generateToolCallId,
 } from './protocol';
diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts
index 408220fbb77d..f78e78ce920a 100644
--- a/src/libs/agent-runtime/utils/streams/protocol.ts
+++ b/src/libs/agent-runtime/utils/streams/protocol.ts
@@ -1,3 +1,5 @@
+import { ChatStreamCallbacks } from '@/libs/agent-runtime';
+
 export interface StreamStack {
   id: string;
 }
@@ -47,3 +49,52 @@ export const createSSEProtocolTransformer = (
       controller.enqueue(`data: ${JSON.stringify(data)}\n\n`);
     },
   });
+
+export function createCallbacksTransformer(cb: ChatStreamCallbacks | undefined) {
+  const textEncoder = new TextEncoder();
+  let aggregatedResponse = '';
+  let currentType = '';
+  const callbacks = cb || {};
+
+  return new TransformStream({
+    async flush(): Promise<void> {
+      if (callbacks.onCompletion) {
+        await callbacks.onCompletion(aggregatedResponse);
+      }
+
+      if (callbacks.onFinal) {
+        await callbacks.onFinal(aggregatedResponse);
+      }
+    },
+
+    async start(): Promise<void> {
+      if (callbacks.onStart) await callbacks.onStart();
+    },
+
+    async transform(chunk: string, controller): Promise<void> {
+      controller.enqueue(textEncoder.encode(chunk));
+
+      // track the type of the chunk
+      if (chunk.startsWith('event:')) {
+        currentType = chunk.split('event:')[1].trim();
+      }
+      // if the message is a data chunk, handle the callback
+      else if (chunk.startsWith('data:')) {
+        const content = chunk.split('data:')[1].trim();
+
+        switch (currentType) {
+          case 'text': {
+            await callbacks.onText?.(content);
+            await callbacks.onToken?.(JSON.parse(content));
+            break;
+          }
+
+          case 'tool_calls': {
+            // TODO: make on ToolCall callback
+            await callbacks.onToolCall?.();
+          }
+        }
+      }
+    },
+  });
+}

From d4965b293f1a6a4d6d0b448473d01d91632fa45d Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Sat, 11 May 2024 10:13:04 +0000
Subject: [PATCH 20/24] =?UTF-8?q?=20=E2=9C=85=20test:=20add=20tests=20for?=
 =?UTF-8?q?=20runtime?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../agent-runtime/anthropic/index.test.ts     | 195 +++++++++++++++++
 src/libs/agent-runtime/google/index.test.ts   | 154 ++++++++++++++
 src/libs/agent-runtime/google/index.ts        |   8 +-
 src/libs/agent-runtime/groq/index.test.ts     |  39 ++++
 src/libs/agent-runtime/ollama/index.test.ts   | 197 ++++++++++++++++++
 .../openaiCompatibleFactory/index.test.ts     | 155 +++++++++++++-
 6 files changed, 733 insertions(+), 15 deletions(-)
 create mode 100644 src/libs/agent-runtime/ollama/index.test.ts

diff --git a/src/libs/agent-runtime/anthropic/index.test.ts b/src/libs/agent-runtime/anthropic/index.test.ts
index face8a0f3d6a..2c42c29cd859 100644
--- a/src/libs/agent-runtime/anthropic/index.test.ts
+++ b/src/libs/agent-runtime/anthropic/index.test.ts
@@ -1,6 +1,9 @@
 // @vitest-environment node
 import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
+import { ChatCompletionTool } from '@/libs/agent-runtime';
+
+import * as anthropicHelpers from '../utils/anthropicHelpers';
 import * as debugStreamModule from '../utils/debugStream';
 import { LobeAnthropicAI } from './index';
 
@@ -16,6 +19,10 @@ beforeEach(() => {
 
   // 使用 vi.spyOn 来模拟 chat.completions.create 方法
   vi.spyOn(instance['client'].messages, 'create').mockReturnValue(new ReadableStream() as any);
+
+  vi.spyOn(instance['client'].beta.tools.messages, 'create').mockReturnValue({
+    content: [],
+  } as any);
 });
 
 afterEach(() => {
@@ -233,6 +240,54 @@ describe('LobeAnthropicAI', () => {
       process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION = originalDebugValue;
     });
 
+    describe('chat with tools', () => {
+      it('should call client.beta.tools.messages.create when tools are provided', async () => {
+        // Arrange
+        const tools: ChatCompletionTool[] = [
+          { function: { name: 'tool1', description: 'desc1' }, type: 'function' },
+        ];
+        const spyOn = vi.spyOn(anthropicHelpers, 'buildAnthropicTools');
+
+        // Act
+        await instance.chat({
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: 'claude-3-haiku-20240307',
+          temperature: 1,
+          tools,
+        });
+
+        // Assert
+        expect(instance['client'].beta.tools.messages.create).toHaveBeenCalled();
+        expect(spyOn).toHaveBeenCalledWith(tools);
+      });
+
+      it('should handle text and tool_use content correctly in transformResponseToStream', async () => {
+        // Arrange
+        const mockResponse = {
+          content: [
+            { type: 'text', text: 'Hello' },
+            { type: 'tool_use', id: 'tool1', name: 'tool1', input: 'input1' },
+          ],
+        };
+        // @ts-ignore
+        vi.spyOn(instance, 'transformResponseToStream').mockReturnValue(new ReadableStream());
+        vi.spyOn(instance['client'].beta.tools.messages, 'create').mockResolvedValue(
+          mockResponse as any,
+        );
+
+        // Act
+        await instance.chat({
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: 'claude-3-haiku-20240307',
+          temperature: 0,
+          tools: [{ function: { name: 'tool1', description: 'desc1' }, type: 'function' }],
+        });
+
+        // Assert
+        expect(instance['transformResponseToStream']).toHaveBeenCalledWith(mockResponse);
+      });
+    });
+
     describe('Error', () => {
       it('should throw InvalidAnthropicAPIKey error on API_KEY_INVALID error', async () => {
         // Arrange
@@ -305,5 +360,145 @@ describe('LobeAnthropicAI', () => {
         }
       });
     });
+
+    describe('Error handling', () => {
+      it('should throw LocationNotSupportError on 403 error', async () => {
+        // Arrange
+        const apiError = { status: 403 };
+        (instance['client'].messages.create as Mock).mockRejectedValue(apiError);
+
+        // Act & Assert
+        await expect(
+          instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'claude-3-haiku-20240307',
+            temperature: 1,
+          }),
+        ).rejects.toEqual({
+          endpoint: 'https://api.anthropic.com',
+          error: apiError,
+          errorType: 'LocationNotSupportError',
+          provider,
+        });
+      });
+
+      it('should throw AnthropicBizError on other error status codes', async () => {
+        // Arrange
+        const apiError = { status: 500 };
+        (instance['client'].messages.create as Mock).mockRejectedValue(apiError);
+
+        // Act & Assert
+        await expect(
+          instance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'claude-3-haiku-20240307',
+            temperature: 1,
+          }),
+        ).rejects.toEqual({
+          endpoint: 'https://api.anthropic.com',
+          error: apiError,
+          errorType: 'AnthropicBizError',
+          provider,
+        });
+      });
+
+      it('should desensitize custom baseURL in error message', async () => {
+        // Arrange
+        const apiError = { status: 401 };
+        const customInstance = new LobeAnthropicAI({
+          apiKey: 'test',
+          baseURL: 'https://api.custom.com/v1',
+        });
+        vi.spyOn(customInstance['client'].messages, 'create').mockRejectedValue(apiError);
+
+        // Act & Assert
+        await expect(
+          customInstance.chat({
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'claude-3-haiku-20240307',
+            temperature: 0,
+          }),
+        ).rejects.toEqual({
+          endpoint: 'https://api.cu****om.com/v1',
+          error: apiError,
+          errorType: 'InvalidAnthropicAPIKey',
+          provider,
+        });
+      });
+    });
+
+    describe('Options', () => {
+      it('should pass signal to API call', async () => {
+        // Arrange
+        const controller = new AbortController();
+
+        // Act
+        await instance.chat(
+          {
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'claude-3-haiku-20240307',
+            temperature: 1,
+          },
+          { signal: controller.signal },
+        );
+
+        // Assert
+        expect(instance['client'].messages.create).toHaveBeenCalledWith(
+          expect.objectContaining({}),
+          { signal: controller.signal },
+        );
+      });
+
+      it('should apply callback to the returned stream', async () => {
+        // Arrange
+        const callback = vi.fn();
+
+        // Act
+        await instance.chat(
+          {
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'claude-3-haiku-20240307',
+            temperature: 0,
+          },
+          {
+            callback: { onStart: callback },
+          },
+        );
+
+        // Assert
+        expect(callback).toHaveBeenCalled();
+      });
+
+      it('should set headers on the response', async () => {
+        // Arrange
+        const headers = { 'X-Test-Header': 'test' };
+
+        // Act
+        const result = await instance.chat(
+          {
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'claude-3-haiku-20240307',
+            temperature: 1,
+          },
+          { headers },
+        );
+
+        // Assert
+        expect(result.headers.get('X-Test-Header')).toBe('test');
+      });
+    });
+
+    describe('Edge cases', () => {
+      it('should handle empty messages array', async () => {
+        // Act & Assert
+        await expect(
+          instance.chat({
+            messages: [],
+            model: 'claude-3-haiku-20240307',
+            temperature: 1,
+          }),
+        ).resolves.toBeInstanceOf(Response);
+      });
+    });
   });
 });
diff --git a/src/libs/agent-runtime/google/index.test.ts b/src/libs/agent-runtime/google/index.test.ts
index 323738480730..0fc4a34179c8 100644
--- a/src/libs/agent-runtime/google/index.test.ts
+++ b/src/libs/agent-runtime/google/index.test.ts
@@ -1,4 +1,6 @@
 // @vitest-environment edge-runtime
+import { FunctionDeclarationSchemaType } from '@google/generative-ai';
+import { JSONSchema7 } from 'json-schema';
 import OpenAI from 'openai';
 import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
@@ -426,5 +428,157 @@ describe('LobeGoogleAI', () => {
         expect(model).toEqual('gemini-pro-vision');
       });
     });
+
+    describe('buildGoogleTools', () => {
+      it('should return undefined when tools is undefined or empty', () => {
+        expect(instance['buildGoogleTools'](undefined)).toBeUndefined();
+        expect(instance['buildGoogleTools']([])).toBeUndefined();
+      });
+
+      it('should correctly convert ChatCompletionTool to GoogleFunctionCallTool', () => {
+        const tools: OpenAI.ChatCompletionTool[] = [
+          {
+            function: {
+              name: 'testTool',
+              description: 'A test tool',
+              parameters: {
+                type: 'object',
+                properties: {
+                  param1: { type: 'string' },
+                  param2: { type: 'number' },
+                },
+                required: ['param1'],
+              },
+            },
+            type: 'function',
+          },
+        ];
+
+        const googleTools = instance['buildGoogleTools'](tools);
+
+        expect(googleTools).toHaveLength(1);
+        expect(googleTools![0].functionDeclarations![0]).toEqual({
+          name: 'testTool',
+          description: 'A test tool',
+          parameters: {
+            type: FunctionDeclarationSchemaType.OBJECT,
+            properties: {
+              param1: { type: FunctionDeclarationSchemaType.STRING },
+              param2: { type: FunctionDeclarationSchemaType.NUMBER },
+            },
+            required: ['param1'],
+          },
+        });
+      });
+    });
+
+    describe('convertSchemaObject', () => {
+      it('should correctly convert object schema', () => {
+        const schema: JSONSchema7 = {
+          type: 'object',
+          properties: {
+            prop1: { type: 'string' },
+            prop2: { type: 'number' },
+          },
+        };
+
+        const converted = instance['convertSchemaObject'](schema);
+
+        expect(converted).toEqual({
+          type: FunctionDeclarationSchemaType.OBJECT,
+          properties: {
+            prop1: { type: FunctionDeclarationSchemaType.STRING },
+            prop2: { type: FunctionDeclarationSchemaType.NUMBER },
+          },
+        });
+      });
+
+      // 类似地添加 array/string/number/boolean 类型schema的测试用例
+      // ...
+
+      it('should correctly convert nested schema', () => {
+        const schema: JSONSchema7 = {
+          type: 'object',
+          properties: {
+            nested: {
+              type: 'array',
+              items: {
+                type: 'object',
+                properties: {
+                  prop: { type: 'string' },
+                },
+              },
+            },
+          },
+        };
+
+        const converted = instance['convertSchemaObject'](schema);
+
+        expect(converted).toEqual({
+          type: FunctionDeclarationSchemaType.OBJECT,
+          properties: {
+            nested: {
+              type: FunctionDeclarationSchemaType.ARRAY,
+              items: {
+                type: FunctionDeclarationSchemaType.OBJECT,
+                properties: {
+                  prop: { type: FunctionDeclarationSchemaType.STRING },
+                },
+              },
+            },
+          },
+        });
+      });
+    });
+
+    describe('convertOAIMessagesToGoogleMessage', () => {
+      it('should correctly convert assistant message', () => {
+        const message: OpenAIChatMessage = {
+          role: 'assistant',
+          content: 'Hello',
+        };
+
+        const converted = instance['convertOAIMessagesToGoogleMessage'](message);
+
+        expect(converted).toEqual({
+          role: 'model',
+          parts: [{ text: 'Hello' }],
+        });
+      });
+
+      it('should correctly convert user message', () => {
+        const message: OpenAIChatMessage = {
+          role: 'user',
+          content: 'Hi',
+        };
+
+        const converted = instance['convertOAIMessagesToGoogleMessage'](message);
+
+        expect(converted).toEqual({
+          role: 'user',
+          parts: [{ text: 'Hi' }],
+        });
+      });
+
+      it('should correctly convert message with content parts', () => {
+        const message: OpenAIChatMessage = {
+          role: 'user',
+          content: [
+            { type: 'text', text: 'Check this image:' },
+            { type: 'image_url', image_url: { url: 'data:image/png;base64,...' } },
+          ],
+        };
+
+        const converted = instance['convertOAIMessagesToGoogleMessage'](message);
+
+        expect(converted).toEqual({
+          role: 'user',
+          parts: [
+            { text: 'Check this image:' },
+            { inlineData: { data: '...', mimeType: 'image/png' } },
+          ],
+        });
+      });
+    });
   });
 });
diff --git a/src/libs/agent-runtime/google/index.ts b/src/libs/agent-runtime/google/index.ts
index e9984be47799..cf4c0689ac9b 100644
--- a/src/libs/agent-runtime/google/index.ts
+++ b/src/libs/agent-runtime/google/index.ts
@@ -247,11 +247,7 @@ export class LobeGoogleAI implements LobeRuntimeAI {
 
     return [
       {
-        functionDeclarations: tools.map((tool) => {
-          const t = this.convertToolToGoogleTool(tool);
-          console.log('output Schema', t);
-          return t;
-        }),
+        functionDeclarations: tools.map((tool) => this.convertToolToGoogleTool(tool)),
       },
     ];
   }
@@ -260,8 +256,6 @@ export class LobeGoogleAI implements LobeRuntimeAI {
     const functionDeclaration = tool.function;
     const parameters = functionDeclaration.parameters;
 
-    console.log('input Schema', JSON.stringify(parameters, null, 2));
-
     return {
       description: functionDeclaration.description,
       name: functionDeclaration.name,
diff --git a/src/libs/agent-runtime/groq/index.test.ts b/src/libs/agent-runtime/groq/index.test.ts
index e8363720fdb0..fda104e1faa2 100644
--- a/src/libs/agent-runtime/groq/index.test.ts
+++ b/src/libs/agent-runtime/groq/index.test.ts
@@ -71,6 +71,45 @@ describe('LobeGroqAI', () => {
       expect(result).toBeInstanceOf(Response);
     });
 
+    describe('handlePayload option', () => {
+      it('should set stream to false when payload contains tools', async () => {
+        const mockCreateMethod = vi
+          .spyOn(instance['client'].chat.completions, 'create')
+          .mockResolvedValue({
+            id: 'chatcmpl-8xDx5AETP8mESQN7UB30GxTN2H1SO',
+            object: 'chat.completion',
+            created: 1709125675,
+            model: 'mistralai/mistral-7b-instruct:free',
+            system_fingerprint: 'fp_86156a94a0',
+            choices: [
+              {
+                index: 0,
+                message: { role: 'assistant', content: 'hello' },
+                logprobs: null,
+                finish_reason: 'stop',
+              },
+            ],
+          });
+
+        await instance.chat({
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: 'mistralai/mistral-7b-instruct:free',
+          temperature: 0,
+          tools: [
+            {
+              type: 'function',
+              function: { name: 'tool1', description: '', parameters: {} },
+            },
+          ],
+        });
+
+        expect(mockCreateMethod).toHaveBeenCalledWith(
+          expect.objectContaining({ stream: false }),
+          expect.anything(),
+        );
+      });
+    });
+
     describe('Error', () => {
       it('should return OpenRouterBizError with an openai error response when OpenAI.APIError is thrown', async () => {
         // Arrange
diff --git a/src/libs/agent-runtime/ollama/index.test.ts b/src/libs/agent-runtime/ollama/index.test.ts
new file mode 100644
index 000000000000..ca9596123ee1
--- /dev/null
+++ b/src/libs/agent-runtime/ollama/index.test.ts
@@ -0,0 +1,197 @@
+import { Ollama } from 'ollama/browser';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { AgentRuntimeErrorType } from '../error';
+import { ModelProvider } from '../types';
+import { AgentRuntimeError } from '../utils/createError';
+import { LobeOllamaAI } from './index';
+
+vi.mock('ollama/browser');
+
+describe('LobeOllamaAI', () => {
+  let ollamaAI: LobeOllamaAI;
+
+  beforeEach(() => {
+    ollamaAI = new LobeOllamaAI({ baseURL: 'https://example.com' });
+  });
+
+  afterEach(() => {
+    vi.resetAllMocks();
+  });
+
+  describe('constructor', () => {
+    it('should initialize Ollama client and baseURL with valid baseURL', () => {
+      expect(ollamaAI['client']).toBeInstanceOf(Ollama);
+      expect(ollamaAI.baseURL).toBe('https://example.com');
+    });
+
+    it('should throw AgentRuntimeError with invalid baseURL', () => {
+      try {
+        new LobeOllamaAI({ baseURL: 'invalid-url' });
+      } catch (e) {
+        expect(e).toEqual(AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidOllamaArgs));
+      }
+    });
+  });
+
+  describe('chat', () => {
+    it('should call Ollama client chat method and return StreamingResponse', async () => {
+      const chatMock = vi.fn().mockResolvedValue({});
+      vi.mocked(Ollama.prototype.chat).mockImplementation(chatMock);
+
+      const payload = {
+        messages: [{ content: 'Hello', role: 'user' }],
+        model: 'model-id',
+      };
+      const options = { signal: new AbortController().signal };
+
+      const response = await ollamaAI.chat(payload as any, options);
+
+      expect(chatMock).toHaveBeenCalledWith({
+        messages: [{ content: 'Hello', role: 'user' }],
+        model: 'model-id',
+        options: {
+          frequency_penalty: undefined,
+          presence_penalty: undefined,
+          temperature: undefined,
+          top_p: undefined,
+        },
+        stream: true,
+      });
+      expect(response).toBeInstanceOf(Response);
+    });
+
+    it('should throw AgentRuntimeError when Ollama client chat method throws an error', async () => {
+      const errorMock = {
+        message: 'Chat error',
+        name: 'ChatError',
+        status_code: 500,
+      };
+      vi.mocked(Ollama.prototype.chat).mockRejectedValue(errorMock);
+
+      const payload = {
+        messages: [{ content: 'Hello', role: 'user' }],
+        model: 'model-id',
+      };
+
+      try {
+        await ollamaAI.chat(payload as any);
+      } catch (e) {
+        expect(e).toEqual(
+          AgentRuntimeError.chat({
+            error: errorMock,
+            errorType: AgentRuntimeErrorType.OllamaBizError,
+            provider: ModelProvider.Ollama,
+          }),
+        );
+      }
+    });
+
+    it('should abort the request when signal aborts', async () => {
+      const abortMock = vi.fn();
+      vi.mocked(Ollama.prototype.abort).mockImplementation(abortMock);
+
+      const payload = {
+        messages: [{ content: 'Hello', role: 'user' }],
+        model: 'model-id',
+      };
+      const options = { signal: new AbortController().signal };
+
+      ollamaAI.chat(payload as any, options);
+
+      options.signal.dispatchEvent(new Event('abort'));
+
+      expect(abortMock).toHaveBeenCalled();
+    });
+  });
+
+  describe('models', () => {
+    it('should call Ollama client list method and return ChatModelCard array', async () => {
+      const listMock = vi.fn().mockResolvedValue({
+        models: [{ name: 'model-1' }, { name: 'model-2' }],
+      });
+      vi.mocked(Ollama.prototype.list).mockImplementation(listMock);
+
+      const models = await ollamaAI.models();
+
+      expect(listMock).toHaveBeenCalled();
+      expect(models).toEqual([{ id: 'model-1' }, { id: 'model-2' }]);
+    });
+  });
+
+  describe('buildOllamaMessages', () => {
+    it('should convert OpenAIChatMessage array to OllamaMessage array', () => {
+      const messages = [
+        { content: 'Hello', role: 'user' },
+        { content: 'Hi there!', role: 'assistant' },
+      ];
+
+      const ollamaMessages = ollamaAI['buildOllamaMessages'](messages as any);
+
+      expect(ollamaMessages).toEqual([
+        { content: 'Hello', role: 'user' },
+        { content: 'Hi there!', role: 'assistant' },
+      ]);
+    });
+  });
+
+  describe('convertContentToOllamaMessage', () => {
+    it('should convert string content to OllamaMessage', () => {
+      const message = { content: 'Hello', role: 'user' };
+
+      const ollamaMessage = ollamaAI['convertContentToOllamaMessage'](message as any);
+
+      expect(ollamaMessage).toEqual({ content: 'Hello', role: 'user' });
+    });
+
+    it('should convert text content to OllamaMessage', () => {
+      const message = {
+        content: [{ type: 'text', text: 'Hello' }],
+        role: 'user',
+      };
+
+      const ollamaMessage = ollamaAI['convertContentToOllamaMessage'](message as any);
+
+      expect(ollamaMessage).toEqual({ content: 'Hello', role: 'user' });
+    });
+
+    it('should convert image_url content to OllamaMessage with images', () => {
+      const message = {
+        content: [
+          {
+            type: 'image_url',
+            image_url: { url: 'data:image/png;base64,abc123' },
+          },
+        ],
+        role: 'user',
+      };
+
+      const ollamaMessage = ollamaAI['convertContentToOllamaMessage'](message as any);
+
+      expect(ollamaMessage).toEqual({
+        content: '',
+        role: 'user',
+        images: ['abc123'],
+      });
+    });
+
+    it('should ignore invalid image_url content', () => {
+      const message = {
+        content: [
+          {
+            type: 'image_url',
+            image_url: { url: 'invalid-url' },
+          },
+        ],
+        role: 'user',
+      };
+
+      const ollamaMessage = ollamaAI['convertContentToOllamaMessage'](message as any);
+
+      expect(ollamaMessage).toEqual({
+        content: '',
+        role: 'user',
+      });
+    });
+  });
+});
diff --git a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts
index 621bd94f8583..df2ae76aed44 100644
--- a/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts
+++ b/src/libs/agent-runtime/utils/openaiCompatibleFactory/index.test.ts
@@ -30,13 +30,6 @@ const LobeMockProvider = LobeOpenAICompatibleFactory({
       if (error.status === 403)
         return { error, errorType: AgentRuntimeErrorType.LocationNotSupportError };
     },
-    handlePayload: (payload) => {
-      return {
-        ...payload,
-        // disable stream for tools due to groq dont support
-        stream: !payload.tools,
-      } as any;
-    },
   },
   debug: {
     chatCompletion: () => process.env.DEBUG_MOCKPROVIDER_CHAT_COMPLETION === '1',
@@ -71,7 +64,7 @@ describe('LobeOpenAICompatibleFactory', () => {
   });
 
   describe('chat', () => {
-    it('should return a StreamingTextResponse on successful API call', async () => {
+    it('should return a Response on successful API call', async () => {
       // Arrange
       const mockStream = new ReadableStream();
       const mockResponse = Promise.resolve(mockStream);
@@ -120,6 +113,131 @@ describe('LobeOpenAICompatibleFactory', () => {
       expect(result).toBeInstanceOf(Response);
     });
 
+    describe('streaming response', () => {
+      it('should handle multiple data chunks correctly', async () => {
+        const mockStream = new ReadableStream({
+          start(controller) {
+            controller.enqueue({
+              id: 'a',
+              object: 'chat.completion.chunk',
+              created: 1709125675,
+              model: 'mistralai/mistral-7b-instruct:free',
+              system_fingerprint: 'fp_86156a94a0',
+              choices: [
+                { index: 0, delta: { content: 'hello' }, logprobs: null, finish_reason: null },
+              ],
+            });
+            controller.close();
+          },
+        });
+        vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
+          mockStream as any,
+        );
+
+        const result = await instance.chat({
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: 'mistralai/mistral-7b-instruct:free',
+          temperature: 0,
+        });
+
+        const decoder = new TextDecoder();
+        const reader = result.body!.getReader();
+        expect(decoder.decode((await reader.read()).value)).toEqual('id: a\n');
+        expect(decoder.decode((await reader.read()).value)).toEqual('event: text\n');
+        expect(decoder.decode((await reader.read()).value)).toEqual('data: "hello"\n\n');
+        expect((await reader.read()).done).toBe(true);
+      });
+
+      it('should transform non-streaming response to stream correctly', async () => {
+        const mockResponse: OpenAI.ChatCompletion = {
+          id: 'a',
+          object: 'chat.completion',
+          created: 123,
+          model: 'mistralai/mistral-7b-instruct:free',
+          choices: [
+            {
+              index: 0,
+              message: { role: 'assistant', content: 'Hello' },
+              finish_reason: 'stop',
+              logprobs: null,
+            },
+          ],
+          usage: {
+            prompt_tokens: 5,
+            completion_tokens: 5,
+            total_tokens: 10,
+          },
+        };
+        vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
+          mockResponse as any,
+        );
+
+        const result = await instance.chat({
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: 'mistralai/mistral-7b-instruct:free',
+          temperature: 0,
+          stream: false,
+        });
+
+        const decoder = new TextDecoder();
+
+        const reader = result.body!.getReader();
+        expect(decoder.decode((await reader.read()).value)).toContain('id: a\n');
+        expect(decoder.decode((await reader.read()).value)).toContain('event: text\n');
+        expect(decoder.decode((await reader.read()).value)).toContain('data: "Hello"\n\n');
+
+        expect(decoder.decode((await reader.read()).value)).toContain('id: a\n');
+        expect(decoder.decode((await reader.read()).value)).toContain('event: text\n');
+        expect(decoder.decode((await reader.read()).value)).toContain('');
+
+        expect((await reader.read()).done).toBe(true);
+      });
+    });
+
+    describe('handlePayload option', () => {
+      it('should modify request payload correctly', async () => {
+        const mockCreateMethod = vi.spyOn(instance['client'].chat.completions, 'create');
+
+        await instance.chat({
+          messages: [{ content: 'Hello', role: 'user' }],
+          model: 'mistralai/mistral-7b-instruct:free',
+          temperature: 0,
+        });
+
+        expect(mockCreateMethod).toHaveBeenCalledWith(
+          expect.objectContaining({
+            // 根据实际的 handlePayload 函数,添加断言
+          }),
+          expect.anything(),
+        );
+      });
+    });
+
+    describe('cancel request', () => {
+      it('should cancel ongoing request correctly', async () => {
+        const controller = new AbortController();
+        const mockCreateMethod = vi.spyOn(instance['client'].chat.completions, 'create');
+
+        instance.chat(
+          {
+            messages: [{ content: 'Hello', role: 'user' }],
+            model: 'mistralai/mistral-7b-instruct:free',
+            temperature: 0,
+          },
+          { signal: controller.signal },
+        );
+
+        controller.abort();
+
+        expect(mockCreateMethod).toHaveBeenCalledWith(
+          expect.anything(),
+          expect.objectContaining({
+            signal: controller.signal,
+          }),
+        );
+      });
+    });
+
     describe('Error', () => {
       it('should return bizErrorType with an openai error response when OpenAI.APIError is thrown', async () => {
         // Arrange
@@ -233,6 +351,27 @@ describe('LobeOpenAICompatibleFactory', () => {
         }
       });
 
+      describe('handleError option', () => {
+        it('should return correct error type for 403 status code', async () => {
+          const error = { status: 403 };
+          vi.spyOn(instance['client'].chat.completions, 'create').mockRejectedValue(error);
+
+          try {
+            await instance.chat({
+              messages: [{ content: 'Hello', role: 'user' }],
+              model: 'mistralai/mistral-7b-instruct:free',
+              temperature: 0,
+            });
+          } catch (e) {
+            expect(e).toEqual({
+              error,
+              errorType: AgentRuntimeErrorType.LocationNotSupportError,
+              provider,
+            });
+          }
+        });
+      });
+
       it('should throw an InvalidOpenRouterAPIKey error type on 401 status code', async () => {
         // Mock the API call to simulate a 401 error
         const error = new Error('Unauthorized') as any;

From b047563456803ae06bc9b859ad85e60314ff63c8 Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Sat, 11 May 2024 20:25:31 +0800
Subject: [PATCH 21/24] =?UTF-8?q?=E2=9C=85=20test:=20add=20more=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../__snapshots__/index.test.ts.snap          |  886 ++
 .../togetherai/fixtures/models.json           | 8111 +++++++++++++++++
 .../agent-runtime/togetherai/index.test.ts    |   16 +-
 .../utils/anthropicHelpers.test.ts            |  119 +
 .../agent-runtime/utils/debugStream.test.ts   |   70 +
 .../utils/streams/anthropic.test.ts           |  197 +
 .../utils/streams/bedrock/llama.test.ts       |  196 +
 .../utils/streams/google-ai.test.ts           |   97 +
 .../utils/streams/ollama.test.ts              |   77 +
 .../utils/streams/openai.test.ts              |  263 +
 src/store/tool/slices/builtin/action.test.ts  |   90 +
 src/utils/fetch.test.ts                       |  155 +-
 src/utils/fetch.ts                            |    3 +-
 13 files changed, 10277 insertions(+), 3 deletions(-)
 create mode 100644 src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap
 create mode 100644 src/libs/agent-runtime/togetherai/fixtures/models.json
 create mode 100644 src/libs/agent-runtime/utils/debugStream.test.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/anthropic.test.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/bedrock/llama.test.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/google-ai.test.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/ollama.test.ts
 create mode 100644 src/libs/agent-runtime/utils/streams/openai.test.ts
 create mode 100644 src/store/tool/slices/builtin/action.test.ts

diff --git a/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap b/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap
new file mode 100644
index 000000000000..fb029458090a
--- /dev/null
+++ b/src/libs/agent-runtime/togetherai/__snapshots__/index.test.ts.snap
@@ -0,0 +1,886 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`LobeTogetherAI > models > should get models 1`] = `
+[
+  {
+    "description": "This model is a 75/25 merge of Chronos (13B) and Nous Hermes (13B) models resulting in having a great ability to produce evocative storywriting and follow a narrative.",
+    "displayName": "Chronos Hermes (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Austism/chronos-hermes-13b",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "MythoLogic-L2 and Huginn merge using a highly experimental tensor type merge technique. The main difference with MythoMix is that I allowed more of Huginn to intermingle with the single tensors located at the front and end of a model",
+    "displayName": "MythoMax-L2 (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Gryphe/MythoMax-L2-13b",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "first Nous collection of dataset and models made by fine-tuning mostly on data created by Nous in-house",
+    "displayName": "Nous Capybara v1.9 (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "NousResearch/Nous-Capybara-7B-V1p9",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Nous Hermes 2 on Mistral 7B DPO is the new flagship 7B Hermes! This model was DPO'd from Teknium/OpenHermes-2.5-Mistral-7B and has improved across the board on all benchmarks tested - AGIEval, BigBench Reasoning, GPT4All, and TruthfulQA.",
+    "displayName": "Nous Hermes 2 - Mistral DPO (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Nous Hermes 2 Mixtral 7bx8 DPO is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.",
+    "displayName": "Nous Hermes 2 - Mixtral 8x7B-DPO ",
+    "enabled": true,
+    "functionCall": false,
+    "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Nous Hermes 2 Mixtral 7bx8 SFT is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.",
+    "displayName": "Nous Hermes 2 - Mixtral 8x7B-SFT",
+    "enabled": false,
+    "functionCall": false,
+    "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Nous Hermes 2 - Yi-34B is a state of the art Yi Fine-tune",
+    "displayName": "Nous Hermes-2 Yi (34B)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "NousResearch/Nous-Hermes-2-Yi-34B",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Nous-Hermes-Llama2-13b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
+    "displayName": "Nous Hermes Llama-2 (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "NousResearch/Nous-Hermes-Llama2-13b",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Nous-Hermes-Llama2-7b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
+    "displayName": "Nous Hermes LLaMA-2 (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "NousResearch/Nous-Hermes-llama-2-7b",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "An OpenOrca dataset fine-tune on top of Mistral 7B by the OpenOrca team.",
+    "displayName": "OpenOrca Mistral (7B) 8K",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Open-Orca/Mistral-7B-OpenOrca",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 Chat (0.5B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-0.5B-Chat",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 Chat (1.8B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-1.8B-Chat",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 Chat (110B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-110B-Chat",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 Chat (14B)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-14B-Chat",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 Chat (32B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-32B-Chat",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 Chat (4B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-4B-Chat",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 Chat (72B)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-72B-Chat",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "displayName": "Qwen 1.5 Chat (7B)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "Qwen/Qwen1.5-7B-Chat",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Arctic is a dense-MoE Hybrid transformer architecture pre-trained from scratch by the Snowflake AI Research Team.",
+    "displayName": "Snowflake Arctic Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Snowflake/snowflake-arctic-instruct",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Re:MythoMax (ReMM) is a recreation trial of the original MythoMax-L2-B13 with updated models. This merge use SLERP [TESTING] to merge ReML and Huginn v1.2.",
+    "displayName": "ReMM SLERP L2 (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Undi95/ReMM-SLERP-L2-13B",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "A merge of models built by Undi95 with the new task_arithmetic merge method from mergekit.",
+    "displayName": "Toppy M (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "Undi95/Toppy-M-7B",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities",
+    "displayName": "WizardLM v1.2 (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "WizardLM/WizardLM-13B-V1.2",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "The OLMo models are trained on the Dolma dataset",
+    "displayName": "OLMo Instruct (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "allenai/OLMo-7B-Instruct",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Instruct (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-13b-Instruct-hf",
+    "maxOutput": 16384,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Instruct (34B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-34b-Instruct-hf",
+    "maxOutput": 16384,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Instruct (70B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-70b-Instruct-hf",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Instruct (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "codellama/CodeLlama-7b-Instruct-hf",
+    "maxOutput": 16384,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "description": "This Dolphin is really good at coding, I trained with a lot of coding data. It is very obedient but it is not DPO tuned - so you still might need to encourage it in the system prompt as I show in the below examples.",
+    "displayName": "Dolphin 2.5 Mixtral 8x7b",
+    "enabled": false,
+    "functionCall": false,
+    "id": "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "DBRX Instruct is a mixture-of-experts (MoE) large language model trained from scratch by Databricks. DBRX Instruct specializes in few-turn interactions.",
+    "displayName": "DBRX Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "databricks/dbrx-instruct",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.",
+    "displayName": "Deepseek Coder Instruct (33B)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "deepseek-ai/deepseek-coder-33b-instruct",
+    "maxOutput": 16384,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "description": "trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese",
+    "displayName": "DeepSeek LLM Chat (67B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "deepseek-ai/deepseek-llm-67b-chat",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "An instruction fine-tuned LLaMA-2 (70B) model by merging Platypus2 (70B) by garage-bAInd and LLaMA-2 Instruct v2 (70B) by upstage.",
+    "displayName": "Platypus2 Instruct (70B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "garage-bAInd/Platypus2-70B-instruct",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "displayName": "Gemma Instruct (2B)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "google/gemma-2b-it",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "displayName": "Gemma Instruct (7B)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "google/gemma-7b-it",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
+    "displayName": "Vicuna v1.5 (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "lmsys/vicuna-13b-v1.5",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
+    "displayName": "Vicuna v1.5 (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "lmsys/vicuna-7b-v1.5",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 Chat (13B)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "meta-llama/Llama-2-13b-chat-hf",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 Chat (70B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/Llama-2-70b-chat-hf",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 Chat (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/Llama-2-7b-chat-hf",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "displayName": "Meta Llama 3 70B Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/Llama-3-70b-chat-hf",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "displayName": "Meta Llama 3 8B Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "meta-llama/Llama-3-8b-chat-hf",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "WizardLM-2 8x22B is Wizard's most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models.",
+    "displayName": "WizardLM-2 (8x22B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "microsoft/WizardLM-2-8x22B",
+    "maxOutput": 65536,
+    "tokens": 65536,
+    "vision": false,
+  },
+  {
+    "description": "instruct fine-tuned version of Mistral-7B-v0.1",
+    "displayName": "Mistral (7B) Instruct",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/Mistral-7B-Instruct-v0.1",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
+    "displayName": "Mistral (7B) Instruct v0.2",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/Mistral-7B-Instruct-v0.2",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.",
+    "displayName": "Mixtral-8x22B Instruct v0.1",
+    "enabled": false,
+    "functionCall": false,
+    "id": "mistralai/Mixtral-8x22B-Instruct-v0.1",
+    "maxOutput": 65536,
+    "tokens": 65536,
+    "vision": false,
+  },
+  {
+    "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
+    "displayName": "Mixtral-8x7B Instruct v0.1",
+    "enabled": true,
+    "functionCall": false,
+    "id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "A merge of OpenChat 3.5 was trained with C-RLFT on a collection of publicly available high-quality instruction data, with a custom processing pipeline.",
+    "displayName": "OpenChat 3.5",
+    "enabled": false,
+    "functionCall": false,
+    "id": "openchat/openchat-3.5-1210",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "A state-of-the-art model by Snorkel AI, DPO fine-tuned on Mistral-7B",
+    "displayName": "Snorkel Mistral PairRM DPO (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "snorkelai/Snorkel-Mistral-PairRM-DPO",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "State of the art Mistral Fine-tuned on extensive public datasets",
+    "displayName": "OpenHermes-2-Mistral (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "teknium/OpenHermes-2-Mistral-7B",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Continuation of OpenHermes 2 Mistral model trained on additional code datasets",
+    "displayName": "OpenHermes-2.5-Mistral (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "teknium/OpenHermes-2p5-Mistral-7B",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations, instruction tuned by Together",
+    "displayName": "LLaMA-2-7B-32K-Instruct (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/Llama-2-7B-32K-Instruct",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-7B-v1 base model.",
+    "displayName": "RedPajama-INCITE Chat (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/RedPajama-INCITE-7B-Chat",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-3B-v1 base model.",
+    "displayName": "RedPajama-INCITE Chat (3B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/RedPajama-INCITE-Chat-3B-v1",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers",
+    "displayName": "StripedHyena Nous (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/StripedHyena-Nous-7B",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations. ",
+    "displayName": "Alpaca (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/alpaca-7b",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling",
+    "displayName": "Upstage SOLAR Instruct v1 (11B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "upstage/SOLAR-10.7B-Instruct-v1.0",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
+    "displayName": "01-ai Yi Chat (34B)",
+    "enabled": true,
+    "functionCall": false,
+    "id": "zero-one-ai/Yi-34B-Chat",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "displayName": "Llama3 8B Chat HF INT4",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/Llama-3-8b-chat-hf-int4",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "displayName": "Togethercomputer Llama3 8B Instruct Int8",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/Llama-3-8b-chat-hf-int8",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "Chat model based on EleutherAI’s Pythia-7B model, and is fine-tuned with data focusing on dialog-style interactions.",
+    "displayName": "Pythia-Chat-Base (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/Pythia-Chat-Base-7B-v0.16",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.",
+    "displayName": "MPT-Chat (30B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/mpt-30b-chat",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.",
+    "displayName": "Koala (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/Koala-7B",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "An instruction-following LLM based on pythia-12b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
+    "displayName": "Dolly v2 (12B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "databricks/dolly-v2-12b",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "An instruction-following LLM based on pythia-3b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
+    "displayName": "Dolly v2 (3B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "databricks/dolly-v2-3b",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
+    "displayName": "Guanaco (65B) ",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/guanaco-65b",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Chatbot trained by fine-tuning Flan-t5-xl on user-shared conversations collected from ShareGPT.",
+    "displayName": "Vicuna-FastChat-T5 (3B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "lmsys/fastchat-t5-3b-v1.0",
+    "maxOutput": 512,
+    "tokens": 512,
+    "vision": false,
+  },
+  {
+    "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ",
+    "displayName": "Open-Assistant StableLM SFT-7 (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "OpenAssistant/stablelm-7b-sft-v7-epoch-3",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": true,
+  },
+  {
+    "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.",
+    "displayName": "MPT-Chat (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/mpt-7b-chat",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ",
+    "displayName": "Open-Assistant Pythia SFT-4 (12B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": true,
+  },
+  {
+    "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.",
+    "displayName": "Vicuna v1.3 (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "lmsys/vicuna-7b-v1.3",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Nous-Hermes-Llama2-70b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
+    "displayName": "Nous Hermes LLaMA-2 (70B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "NousResearch/Nous-Hermes-Llama2-70b",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
+    "displayName": "Vicuna v1.5 16K (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "lmsys/vicuna-13b-v1.5-16k",
+    "maxOutput": 16384,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "description": "Chat model fine-tuned from EleutherAI’s GPT-NeoX with over 40 million instructions on carbon reduced compute.",
+    "displayName": "GPT-NeoXT-Chat-Base (20B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/GPT-NeoXT-Chat-Base-20B",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "A fine-tuned version of Mistral-7B to act as a helpful assistant.",
+    "displayName": "Zephyr-7B-ß",
+    "enabled": false,
+    "functionCall": false,
+    "id": "HuggingFaceH4/zephyr-7b-beta",
+    "maxOutput": 32768,
+    "tokens": 32768,
+    "vision": false,
+  },
+  {
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Instruct (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/CodeLlama-7b-Instruct",
+    "maxOutput": 16384,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
+    "displayName": "Guanaco (13B) ",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/guanaco-13b",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 Chat (70B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/llama-2-70b-chat",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Instruct (34B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/CodeLlama-34b-Instruct",
+    "maxOutput": 16384,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "displayName": "Code Llama Instruct (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/CodeLlama-13b-Instruct",
+    "maxOutput": 16384,
+    "tokens": 16384,
+    "vision": false,
+  },
+  {
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 Chat (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/llama-2-13b-chat",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.",
+    "displayName": "Vicuna v1.3 (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "lmsys/vicuna-13b-v1.3",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Fine-tuned from StarCoder to act as a helpful coding assistant. As an alpha release is only intended for educational or research purpopses.",
+    "displayName": "StarCoderChat Alpha (16B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "HuggingFaceH4/starchat-alpha",
+    "maxOutput": 8192,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "An instruction-following LLM based on pythia-7b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
+    "displayName": "Dolly v2 (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "databricks/dolly-v2-7b",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
+    "displayName": "Guanaco (33B) ",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/guanaco-33b",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.",
+    "displayName": "Koala (13B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/Koala-13B",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+  {
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "displayName": "LLaMA-2 Chat (7B)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/llama-2-7b-chat",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling",
+    "displayName": "Upstage SOLAR Instruct v1 (11B)-Int4",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4",
+    "maxOutput": 4096,
+    "tokens": 4096,
+    "vision": false,
+  },
+  {
+    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks. ",
+    "displayName": "Guanaco (7B) ",
+    "enabled": false,
+    "functionCall": false,
+    "id": "togethercomputer/guanaco-7b",
+    "maxOutput": 2048,
+    "tokens": 2048,
+    "vision": false,
+  },
+]
+`;
diff --git a/src/libs/agent-runtime/togetherai/fixtures/models.json b/src/libs/agent-runtime/togetherai/fixtures/models.json
new file mode 100644
index 000000000000..561197f99bb3
--- /dev/null
+++ b/src/libs/agent-runtime/togetherai/fixtures/models.json
@@ -0,0 +1,8111 @@
+[
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64e831864b84b428b8d322d0",
+    "name": "Austism/chronos-hermes-13b",
+    "display_name": "Chronos Hermes (13B)",
+    "display_type": "chat",
+    "description": "This model is a 75/25 merge of Chronos (13B) and Nous Hermes (13B) models resulting in having a great ability to produce evocative storywriting and follow a narrative.",
+    "license": "other",
+    "creator_organization": "Austism",
+    "hardware_label": "2x A100 80GB",
+    "num_parameters": 13000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "config": {
+      "stop": ["</s>"],
+      "prompt_format": "### Instruction:\n{prompt}\n### Response:\n",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-08-24T17:08:25.379Z",
+    "update_at": "2023-08-24T17:08:25.379Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x6966f4A2caf8efaE98C251C3C15210333578C158": 1 },
+      "asks_updated": "2024-05-11T12:20:53.91543414Z",
+      "gpus": { "": 0 },
+      "qps": 0.06666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 34.53333333333333,
+      "throughput_out": 0.5333333333333333,
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.043478260869565216,
+          "qps": 0.06666666666666667,
+          "throughput_in": 34.53333333333333,
+          "throughput_out": 0.5333333333333333,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6560b993b56cf1e0970c9b1a",
+    "name": "BAAI/bge-base-en-v1.5",
+    "display_name": "BAAI-Bge-Base-1p5",
+    "display_type": "embedding",
+    "description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding",
+    "license": "MIT",
+    "creator_organization": "BAAI",
+    "hardware_label": "A40",
+    "pricing_tier": "Featured",
+    "num_parameters": 109482240,
+    "release_date": "2023-11-15T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 },
+    "created_at": "2023-11-24T14:56:19.475Z",
+    "update_at": "2023-12-22T03:26:23.802Z",
+    "instances": [
+      { "avzone": "us-central-2a", "cluster": "jollyllama" },
+      { "avzone": "us-central-1a", "cluster": "sassyseal" }
+    ],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 3,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x18530141Cf50876b091f3D4B9FA3Bb7F7d24d20a": 1,
+        "0x4Aa34b8d92E163D7d7527e17B92Bc83C2F7149a3": 1,
+        "0x8BEE38fD0697C19F06411AaEEea935073005168c": 1,
+        "0xe2d9B1fd3EfBA3fEB7cfc84FD5d9c1621dA3dEB9": 1
+      },
+      "asks_updated": "2024-05-11T03:12:34.75168084Z",
+      "gpus": { "": 0 },
+      "qps": 3.0666666666666664,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 254,
+      "stats": [
+        {
+          "avzone": "us-central-2a",
+          "cluster": "jollyllama",
+          "capacity": 0.008075842696629214,
+          "qps": 1.7333333333333334,
+          "throughput_in": 137.2,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0.008046875,
+          "qps": 1.3333333333333333,
+          "throughput_in": 116.8,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6560b938b56cf1e0970c9b19",
+    "name": "BAAI/bge-large-en-v1.5",
+    "display_name": "BAAI-Bge-Large-1p5",
+    "display_type": "embedding",
+    "description": "bge is short for BAAI general embedding, it maps any text to a low-dimensional dense vector using FlagEmbedding",
+    "license": "MIT",
+    "creator_organization": "BAAI",
+    "hardware_label": "A40",
+    "pricing_tier": "Featured",
+    "num_parameters": 335141888,
+    "release_date": "2023-11-15T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "pricing": { "hourly": 0, "input": 4, "output": 4, "finetune": 0, "base": 0 },
+    "created_at": "2023-11-24T14:54:48.986Z",
+    "update_at": "2023-12-22T03:27:18.465Z",
+    "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 4,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x5ED0BA75594E3429628087603D628838bE686ebF": 1,
+        "0x7153b499cA3C6cc2Bb60Dd5DBF8ba0C6B2532c63": 1,
+        "0xD2a55c4769d98e7Df019A3858FA37036BbbAB5cE": 1,
+        "0xF6122ecAc4D8d96a95E00d6eC8a838f4525D8124": 1
+      },
+      "asks_updated": "2024-05-11T03:00:56.495347114Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-2a",
+          "cluster": "jollyllama",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64f78861d683768020b9f005",
+    "name": "Gryphe/MythoMax-L2-13b",
+    "display_name": "MythoMax-L2 (13B)",
+    "display_type": "chat",
+    "description": "MythoLogic-L2 and Huginn merge using a highly experimental tensor type merge technique. The main difference with MythoMix is that I allowed more of Huginn to intermingle with the single tensors located at the front and end of a model",
+    "license": "other",
+    "creator_organization": "Gryphe",
+    "hardware_label": "1x A40 48GB",
+    "num_parameters": 13000000000,
+    "release_date": "2023-08-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "stop": ["</s>"],
+      "add_generation_prompt": true,
+      "prompt_format": "### Instruction:\n{prompt}\n### Response:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:' }}"
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-09-05T19:58:25.683Z",
+    "update_at": "2023-09-05T19:58:25.683Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 30,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x007fAfa7e8774c40929B946474B0de5288eC6C41": 1,
+        "0x037DBdcEDb5C34a4fcB41Ab8AaD56b5815bE02DE": 1,
+        "0x05a4E02cc4748e92338DCE88e22D81374fD300C9": 1,
+        "0x17957d0c98323Cec3B42BA4a5C0503C5B7114317": 1,
+        "0x1C28d22406B7acff59f57120DcF98685fed4E6d1": 1,
+        "0x2Da6d7d2f5810221C572Dea0A4C56D117913ba60": 1,
+        "0x2F84CaD2c29FAf002787cBc27A7749871dB843F5": 1,
+        "0x50CA731E79882f073e0550c7B4177EF21A20226b": 1,
+        "0x705CE19b5A6BfA9739Ce9160B1DCcaD9c83D9D7e": 1,
+        "0x7101FDCAa53c7E8fF969F4A5Bab72311A9f1a1cf": 1,
+        "0x7986A72CA1d6dE9bD9b1e0ec349a13c92678193b": 1,
+        "0x80Ec6D391649f097c1af115be95f5e67EDD4C86E": 1,
+        "0x80c2a4602548641b57f48504Ac182e13b2895b87": 1,
+        "0x844EE8641055BDc3A4D448782E0B2e582688cF7c": 1,
+        "0x866abAD0f44b6C608DF925b864d73D0b0eCb6FAb": 1,
+        "0x8993bDAC643F3500a20c0DdA18af1f6535840aF6": 1,
+        "0x8ef1AD0c945EDD56CE215c751c4d59BE6e7Ba8E5": 1,
+        "0x9C10b5fe06098EE4475c055A598b03D8AE228B1B": 1,
+        "0x9D76E8FD91d1Ccf7B19e1AbE10144f2721eA5E8F": 1,
+        "0xA059d967aFA12e9B85eC2ABF7930D09aefe789E8": 1,
+        "0xA5CEf1fA8Dd68B4A185CD38903B0CDfFA343182a": 1,
+        "0xA5De493e5FC052EB73126c793d73888a72Ba7BeC": 1,
+        "0xB53B799b1bF6B2cAd3fe831FE54fEC5fF0E13fcC": 1,
+        "0xC28d7EF7781A76ba9486E0e1C143CacF2A88d2C5": 1,
+        "0xD613d94dd0aE5d761eDc0f27c8d01F52439B4036": 1,
+        "0xaf9Db30c926d4e48c1314452Ed7C83b05B4a071b": 1,
+        "0xb4B6C15Dd02deE2a6b9532FD9517bF6e5CA8f313": 1,
+        "0xb6F4896c742DdC850AE9f0070B66F1F4608fe397": 1,
+        "0xd2D502b3e57cA8D2E99B807FbE1930f9eE6F44d0": 1,
+        "0xe58DA790EF3BA3FfbD032b3f7f13101De142C68e": 1
+      },
+      "asks_updated": "2024-05-11T00:54:42.711586793Z",
+      "gpus": { "": 0 },
+      "qps": 13.666666666666666,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 21538.133333333335,
+      "throughput_out": 1850.3333333333333,
+      "error_rate": 0.06666666666666667,
+      "retry_rate": 0.13333333333333333,
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0.048909090909090264,
+          "qps": 13.666666666666666,
+          "throughput_in": 21538.133333333335,
+          "throughput_out": 1850.3333333333333,
+          "error_rate": 0.06666666666666667,
+          "retry_rate": 0.13333333333333333
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6573540efe9a5c79871809ce",
+    "name": "Meta-Llama/Llama-Guard-7b",
+    "display_name": "Llama Guard (7B)",
+    "display_type": "moderation",
+    "description": "Llama Guard: LLM-based Input-Output Safeguard for Human-AI Conversations",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/meta-llama/LlamaGuard-7b",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "chat_template": "{{ '[INST] Task: Check if there is unsafe content in \\'' }}{% if messages[messages.length-1]['role'] == 'user' %}{{ 'User' }}{% else %}{{ 'Agent' }}{% endif %}{{ '\\' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nO1: Violence and Hate. \nShould not\n - Help people plan or engage in violence.\n - Encourage people to engage in violence.\n - Express hateful, derogatory, or demeaning sentiments against people based on sensitive personal characteristics like their race, color, religion, national origin, sexual orientation, gender, gender identity, or disability.\n - Encourage discrimination against people based on those characteristics and should not use slurs or create insults based on those characteristics.\nCan\n - Provide information on violence and discrimination.\n - Discuss the topics of hate and violence, and can discuss historical events involving violence.\nO2: Sexual Content. \nShould not\n - Engage in sexually explicit (i.e., erotic) conversations and should not encourage people to engage in sexual activities.\nCan\n - Discuss topics related to sexuality and sexual education.\n - Provide general information regarding sexual activities.\n - Provide guidance on topics related to sexual health.\nO3: Criminal Planning. \nShould not\n - Help people plan or engage in criminal activities like theft, kidnapping, or financial crimes, especially when the person indicates that they may act on the information provided to them.\n - Encourage people to engage in criminal activities or condone criminal behavior.\nCan\n - Help people understand how crimes are committed, provided that the response is not so detailed that it could be used to provide a roadmap for future crimes.\n - Help people understand how systems work that are used to prevent crimes, provided those descriptions do not provide detailed information on how to defeat those systems.\nO4: Guns and Illegal Weapons. \nShould not\n - Assist in the planning or execution of firearm or illegal weapons crimes, especially when the person indicates that they may act on the information provided to them.\n - Encourage people to engage in firearm or illegal weapons crimes or condone such behavior.\nCan\n - Discuss firearms and the arguments for and against firearm ownership.\nO5: Regulated or Controlled Substances. \nShould not\n - Encourage or assist people in the trafficking, consumption, or use of regulated or controlled substances, including but not limited to illegal drugs, tobacco, alcohol, and cannabis.\n - Assist or encourage people to create such substances when it is illegal to do so.\nCan\n - Discuss the use of regulated or controlled substances, and can provide general information on those substances (ex: what their chemical composition is and what their effects are).\n - Provide information on how those substances are manufactured, provided that the information is not so detailed as to provide step-by-step instructions for their creation.\nO6: Self-Harm. \nShould not\n - Encourage people to harm themselves.\n - Romanticize or condone self-harm or suicide.\n - Provide information regarding the methods of suicide or self-harm.\n - Provide any information or advice, other than information regarding appropriate health resources, to people who express or imply that they wish to harm themselves.\nCan\n - Provide general information regarding self-harm and suicide (ex: statistics, the mental health challenges faced by those who take these actions).\nShould\n - Provide information regarding appropriate health resources to people who imply or express the desire to harm themselves.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\n' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% else %}{{ 'Agent: ' + message['content'] + '\n\n' }}{% endif %}{% endfor %}{{ '<END CONVERSATION>\n\nProvide your safety assessment for '}}{% if messages[messages.length-1]['role'] == 'user' %}{{ 'User' }}{% else %}{{ 'Agent' }}{% endif %}{{' in the above conversation:\n - First line must read \\'safe\\' or \\'unsafe\\'.\n - If unsafe, a second line must include a comma-separated list of violated categories. [/INST]' }}",
+      "safety_label": "safe",
+      "safe_response": true,
+      "safety_config": { "max_tokens": 64 },
+      "safety_categories": {
+        "O1": "Violence and Hate",
+        "O2": "Sexual Content",
+        "O3": "Criminal Planning",
+        "O4": "Guns and Illegal Weapons",
+        "O5": "Regulated or Controlled Substances",
+        "O6": "Self-Harm"
+      }
+    },
+    "pricing": { "input": 6, "output": 6, "hourly": 0 },
+    "update_at": "2024-04-20T23:25:17.775Z",
+    "instances": [
+      { "avzone": "us-central-5a", "cluster": "wrigleycub" },
+      { "avzone": "ap-northeast-1a", "cluster": "optimisticotter" },
+      { "avzone": "us-east-2a", "cluster": "jumpyjackal" }
+    ],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x4Af456F8E15A15082e24E434Ad794ad9387C7169": 1,
+        "0x4ceB37C5700106874aA40B8DA6b7349Ab7627643": 1,
+        "0x7Cfb4b7470B07154eA0802dAC8f626b0F5b89faE": 1,
+        "0xE3bc0e43e4d3Ff1C6942C6134CfB7496A273eCdA": 1
+      },
+      "asks_updated": "2024-05-11T11:46:46.414181302Z",
+      "gpus": { "": 0 },
+      "qps": 23.066666666666666,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 27473.200000000004,
+      "throughput_out": 52.53333333333333,
+      "retry_rate": 1,
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0.10809523809523812,
+          "qps": 8.466666666666667,
+          "throughput_in": 10082,
+          "throughput_out": 18.933333333333334,
+          "error_rate": 0,
+          "retry_rate": 0.26666666666666666
+        },
+        {
+          "avzone": "ap-northeast-1a",
+          "cluster": "optimisticotter",
+          "capacity": 0.13665644171779157,
+          "qps": 7.466666666666667,
+          "throughput_in": 9073.333333333334,
+          "throughput_out": 17.533333333333335,
+          "error_rate": 0,
+          "retry_rate": 0.4
+        },
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.24161735700197307,
+          "qps": 7.133333333333334,
+          "throughput_in": 8317.866666666667,
+          "throughput_out": 16.066666666666666,
+          "error_rate": 0,
+          "retry_rate": 0.3333333333333333
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "656f5aac044c74c554a30c4f",
+    "name": "Nexusflow/NexusRaven-V2-13B",
+    "display_name": "NexusRaven (13B)",
+    "display_type": "language",
+    "description": "NexusRaven is an open-source and commercially viable function calling LLM that surpasses the state-of-the-art in function calling capabilities.",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/Nexusflow/NexusRaven-V2-13B",
+    "creator_organization": "Nexusflow",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "13000000000",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 16384,
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-12-05T17:15:24.561Z",
+    "update_at": "2023-12-05T17:15:24.561Z",
+    "instances": [{ "avzone": "ap-northeast-1a", "cluster": "optimisticotter" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 6,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x60e899d1504136B312ebac78CCeCA47Dd62Bd267": 1,
+        "0x66D3F099533df45Dc154e9D10b95B1bcF1f08a03": 1,
+        "0x932Becec6BD385C4607889D7Ed159212A0e732F2": 1,
+        "0xC0251a8dB9B86a149E38c88F46912EdA9Df9f346": 1,
+        "0xE55822B5482FeE8B805Ad51F47f973270c8AEDe5": 1,
+        "0xFd1bFB3A51138c37C6f8F57D4F7AA2f2911d8CAf": 1
+      },
+      "asks_updated": "2024-05-10T17:13:11.525066416Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "ap-northeast-1a",
+          "cluster": "optimisticotter",
+          "capacity": 1,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65664e4d79fe5514beebd5d3",
+    "name": "NousResearch/Nous-Capybara-7B-V1p9",
+    "display_name": "Nous Capybara v1.9 (7B)",
+    "display_type": "chat",
+    "description": "first Nous collection of dataset and models made by fine-tuning mostly on data created by Nous in-house",
+    "license": "MIT",
+    "creator_organization": "NousResearch",
+    "hardware_label": "A100",
+    "pricing_tier": "Featured",
+    "num_parameters": 7241732096,
+    "release_date": "2023-11-15T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "config": {
+      "add_generation_prompt": true,
+      "stop": ["USER:", "ASSISTANT:"],
+      "prompt_format": "USER:\n{prompt}\nASSISTANT:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %} {{ 'USER:\n' + message['content'] + '\n' }}{% elif message['role'] == 'system' %}{{ 'SYSTEM:\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT:\n' + message['content'] + '\n'  }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:\n' }}{% endif %}{% endfor %}"
+    },
+    "pricing": { "input": 50, "output": 50 },
+    "created_at": "2023-11-28T20:32:13.026Z",
+    "update_at": "2023-11-28T20:33:03.163Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x88eB978d91199D40cB23871d4319d382EF40492D": 1,
+        "0xa6C19366D1A480921d66ec924B3513DB8F77781d": 1
+      },
+      "asks_updated": "2024-05-11T02:43:01.448420782Z",
+      "gpus": { "": 0 },
+      "qps": 0.6,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 898.0666666666667,
+      "throughput_out": 36.2,
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0.35555555555555546,
+          "qps": 0.6,
+          "throughput_in": 898.0666666666667,
+          "throughput_out": 36.2,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65d542a20af4aafc88716626",
+    "name": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
+    "display_name": "Nous Hermes 2 - Mistral DPO (7B)",
+    "display_type": "chat",
+    "description": "Nous Hermes 2 on Mistral 7B DPO is the new flagship 7B Hermes! This model was DPO'd from Teknium/OpenHermes-2.5-Mistral-7B and has improved across the board on all benchmarks tested - AGIEval, BigBench Reasoning, GPT4All, and TruthfulQA.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
+    "creator_organization": "NousResearch",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "stop": ["<|im_end|>"],
+      "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-02-21T00:24:02.387Z",
+    "update_at": "2024-02-21T00:24:02.387Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xEFa73cF1A2DD2Be31888913c57bf569cA27ce9E6": 1 },
+      "asks_updated": "2024-05-11T05:55:30.322194054Z",
+      "gpus": { "": 0 },
+      "qps": 0.13333333333333333,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 366.2,
+      "throughput_out": 20.266666666666666,
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0.07326007326007326,
+          "qps": 0.13333333333333333,
+          "throughput_in": 366.2,
+          "throughput_out": 20.266666666666666,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a4b298fbc8405400423169",
+    "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+    "display_name": "Nous Hermes 2 - Mixtral 8x7B-DPO ",
+    "display_type": "chat",
+    "description": "Nous Hermes 2 Mixtral 7bx8 DPO is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+    "creator_organization": "NousResearch",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "56000000000",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "add_generation_prompt": true,
+      "chat_template_name": "default"
+    },
+    "pricing": { "input": 150, "output": 150, "hourly": 0 },
+    "created_at": "2024-01-15T04:20:40.079Z",
+    "update_at": "2024-04-12T18:35:56.478Z",
+    "autopilot_pool": "cr-a100-80-2x",
+    "instances": [
+      { "avzone": "us-south-1a", "cluster": "mustymarfa" },
+      { "avzone": "us-east-1a", "cluster": "happypiglet" },
+      { "avzone": "us-central-5a", "cluster": "wrigleycub" }
+    ],
+    "isFinetuned": false,
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x17B96a27Dd71A9C4687441c14d1feCA207D0D3d4": 1,
+        "0x1812939B682B119d362412811237da09D9bc6c8D": 1,
+        "0xde2F311932B19E8Aa2069302FA701f6d0fA1B574": 1
+      },
+      "asks_updated": "2024-05-11T00:30:10.175648127Z",
+      "gpus": { "": 0 },
+      "qps": 0.9333333333333333,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 887.2,
+      "throughput_out": 13.866666666666667,
+      "stats": [
+        {
+          "avzone": "us-south-1a",
+          "cluster": "mustymarfa",
+          "capacity": 0.03333333333333333,
+          "qps": 0.2,
+          "throughput_in": 301.06666666666666,
+          "throughput_out": 3.7333333333333334,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0.07142857142857142,
+          "qps": 0.2,
+          "throughput_in": 173.66666666666666,
+          "throughput_out": 2.4,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0.08333333333333333,
+          "qps": 0.5333333333333333,
+          "throughput_in": 412.46666666666664,
+          "throughput_out": 7.733333333333333,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a4466efbc8405400423166",
+    "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
+    "display_name": "Nous Hermes 2 - Mixtral 8x7B-SFT",
+    "display_type": "chat",
+    "description": "Nous Hermes 2 Mixtral 7bx8 SFT is the new flagship Nous Research model trained over the Mixtral 7bx8 MoE LLM. The model was trained on over 1,000,000 entries of primarily GPT-4 generated data, as well as other high quality data from open datasets across the AI landscape, achieving state of the art performance on a variety of tasks.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
+    "creator_organization": "NousResearch",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "56000000000",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "add_generation_prompt": true,
+      "chat_template_name": "default"
+    },
+    "pricing": { "input": 150, "output": 150, "hourly": 0 },
+    "created_at": "2024-01-14T20:39:10.060Z",
+    "update_at": "2024-01-14T20:39:10.060Z",
+    "autopilot_pool": "cr-a100-80-2x",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "isFinetuned": false,
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x3805a418c9af7eA4a88C6BC519ba95223EFe87F7": 1 },
+      "asks_updated": "2024-05-10T17:07:56.753575198Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "658c8dad27fb98d2edc447ff",
+    "name": "NousResearch/Nous-Hermes-2-Yi-34B",
+    "display_name": "Nous Hermes-2 Yi (34B)",
+    "display_type": "chat",
+    "description": "Nous Hermes 2 - Yi-34B is a state of the art Yi Fine-tune",
+    "license": "apache-2",
+    "creator_organization": "NousResearch",
+    "hardware_label": "A100",
+    "pricing_tier": "Featured",
+    "num_parameters": 34000000000,
+    "release_date": "2023-12-27T20:48:45.586Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "stop": ["<|im_start|>", "<|im_end|>"],
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "chat_template_name": "default",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 200, "output": 200 },
+    "created_at": "2023-12-27T20:48:45.586Z",
+    "update_at": "2023-12-27T20:50:38.632Z",
+    "instances": [{ "avzone": "ap-northeast-1a", "cluster": "optimisticotter" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x1f58b29024eba2f33b3983733396b4eda0E6f976": 1 },
+      "asks_updated": "2024-05-11T11:46:22.377796052Z",
+      "gpus": { "": 0 },
+      "qps": 18.266666666666666,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 3213.866666666667,
+      "throughput_out": 438.8,
+      "stats": [
+        {
+          "avzone": "ap-northeast-1a",
+          "cluster": "optimisticotter",
+          "capacity": 0.45881427809138686,
+          "qps": 18.266666666666666,
+          "throughput_in": 3213.866666666667,
+          "throughput_out": 438.8,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64cae18d3ede2fa7e2cbcc7d",
+    "name": "NousResearch/Nous-Hermes-Llama2-13b",
+    "display_name": "Nous Hermes Llama-2 (13B)",
+    "display_type": "chat",
+    "description": "Nous-Hermes-Llama2-13b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
+    "license": "mit",
+    "creator_organization": "NousResearch",
+    "hardware_label": "2x A100 80GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": 13000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "prompt_format": "### Instruction:\n{prompt}\n### Response:\n",
+      "stop": ["###", "</s>"],
+      "chat_template_name": "llama",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-08-02T23:06:53.926Z",
+    "update_at": "2023-10-07T00:19:33.779Z",
+    "instances": [{ "avzone": "us-west-1a", "cluster": "curiouscrow" }],
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xfA6b8e3C0ac21BA89F8e75770251f0E4e509eF90": 1 },
+      "asks_updated": "2024-05-10T17:59:32.616570629Z",
+      "gpus": { "": 0 },
+      "qps": 1,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 1430.2,
+      "throughput_out": 166.06666666666666,
+      "stats": [
+        {
+          "avzone": "us-west-1a",
+          "cluster": "curiouscrow",
+          "capacity": 0.336864406779661,
+          "qps": 1,
+          "throughput_in": 1430.2,
+          "throughput_out": 166.06666666666666,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6532f0faf94bacfc629b4cf6",
+    "name": "NousResearch/Nous-Hermes-llama-2-7b",
+    "display_name": "Nous Hermes LLaMA-2 (7B)",
+    "display_type": "chat",
+    "description": "Nous-Hermes-Llama2-7b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/NousResearch/Nous-Hermes-llama-2-7b",
+    "creator_organization": "NousResearch",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 6738415616,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "prompt_format": "### Instruction:\n{prompt}\n### Response:\n",
+      "stop": ["###", "</s>"],
+      "add_generation_prompt": true,
+      "chat_template_name": "llama",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-10-20T21:28:26.403Z",
+    "update_at": "2023-10-24T17:41:52.365Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xf3AbD7152646995C204D8Bee0699AC58653De524": 1 },
+      "asks_updated": "2024-05-10T16:28:20.007677485Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.06666666666666667,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6532f0faf94bacfc629b4cf5",
+    "name": "Open-Orca/Mistral-7B-OpenOrca",
+    "display_name": "OpenOrca Mistral (7B) 8K",
+    "display_type": "chat",
+    "description": "An OpenOrca dataset fine-tune on top of Mistral 7B by the OpenOrca team.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca",
+    "creator_organization": "OpenOrca",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 7241748480,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "config": {
+      "stop": ["<|im_end|>"],
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "add_generation_prompt": true,
+      "chat_template_name": "default"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-10-20T21:28:26.403Z",
+    "update_at": "2023-10-24T00:01:52.541Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x802be1ae9dC8F68c43a47ec3d2070F8f1B0553E8": 1 },
+      "asks_updated": "2024-05-11T11:46:47.152201508Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.1111111111111111,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64fbbc5adfdb1e4b06b5d5cb",
+    "name": "Phind/Phind-CodeLlama-34B-v2",
+    "display_name": "Phind Code LLaMA v2 (34B)",
+    "display_type": "code",
+    "description": "Phind-CodeLlama-34B-v1 trained on additional 1.5B tokens high-quality programming-related data proficient in Python, C/C++, TypeScript, Java, and more.",
+    "license": "llama2",
+    "creator_organization": "Phind",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 33743970304,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 16384,
+    "config": {
+      "prompt_format": "### System Prompt\nYou are an intelligent programming assistant.\n\n### User Message\n{prompt}n\n### Assistant\n",
+      "stop": ["</s>"],
+      "chat_template": "{{ '### System Prompt\nYou are an intelligent programming assistant.\n\n' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '### User Message\n' + message['content'] + '\n' }}{% else %}{{ '### Assistant\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant\n' }}"
+    },
+    "pricing": { "input": 200, "output": 200, "hourly": 0 },
+    "created_at": "2023-09-09T00:29:14.496Z",
+    "update_at": "2023-09-09T00:29:14.496Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "testytiger" }],
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xE3b9434A627d4E042a82A4E04375E7B14D9a2866": 1 },
+      "asks_updated": "2024-05-10T13:54:50.844650373Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "testytiger",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c81b4975e79f24d98b50",
+    "name": "Qwen/Qwen1.5-0.5B-Chat",
+    "display_name": "Qwen 1.5 Chat (0.5B)",
+    "display_type": "chat",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 500000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2024-02-05T11:35:55.571Z",
+    "update_at": "2024-02-05T11:35:55.571Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x69d786B0E491C02c3053287F7FD4aa684A0f86B9": 1 },
+      "asks_updated": "2024-05-10T14:34:01.502238784Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.07142857142857142,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c8164975e79f24d98b4f",
+    "name": "Qwen/Qwen1.5-0.5B",
+    "display_name": "Qwen 1.5 (0.5B)",
+    "display_type": "language",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 500000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {},
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2024-02-05T11:35:50.032Z",
+    "update_at": "2024-02-05T11:35:50.032Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xa01d67F2450E0e7ACBfb7dc8B1a0A3205C5C8310": 1 },
+      "asks_updated": "2024-05-11T00:20:07.81838798Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.07142857142857142,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c8284975e79f24d98b52",
+    "name": "Qwen/Qwen1.5-1.8B-Chat",
+    "display_name": "Qwen 1.5 Chat (1.8B)",
+    "display_type": "chat",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 1800000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2024-02-05T11:36:08.609Z",
+    "update_at": "2024-02-05T11:36:08.609Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x332b426661a850784BAcFd12B9E7D9b51397B1ec": 1 },
+      "asks_updated": "2024-05-10T19:50:02.900326326Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.16666666666666666,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c8214975e79f24d98b51",
+    "name": "Qwen/Qwen1.5-1.8B",
+    "display_name": "Qwen 1.5 (1.8B)",
+    "display_type": "language",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-1.8B",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 1800000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {},
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2024-02-05T11:36:01.895Z",
+    "update_at": "2024-02-05T11:36:01.895Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xE1E3e79fC7e677c1Bdb8E6f6B6dde0B5d78C2ABc": 1 },
+      "asks_updated": "2024-05-10T13:22:12.143866414Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.16666666666666666,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "663929111a16009453d858d6",
+    "name": "Qwen/Qwen1.5-110B-Chat",
+    "display_name": "Qwen 1.5 Chat (110B)",
+    "display_type": "chat",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-110B-Chat",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 110000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "owner_userid": null,
+    "config": {
+      "stop": ["<|im_end|>"],
+      "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 450, "output": 450, "hourly": 0 },
+    "created_at": "2024-05-06T19:01:37.206Z",
+    "update_at": "2024-05-06T19:01:37.206Z",
+    "instances": [{ "avzone": "us-south-1a", "cluster": "mustymarfa" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x1bfE8838c1A5fA63cc1120e2de1Bce2599FDd946": 1 },
+      "asks_updated": "2024-05-11T09:12:31.886283279Z",
+      "gpus": { "": 0 },
+      "qps": 0.26666666666666666,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 143.4,
+      "throughput_out": 42.6,
+      "stats": [
+        {
+          "avzone": "us-south-1a",
+          "cluster": "mustymarfa",
+          "capacity": 0.0476310802274163,
+          "qps": 0.26666666666666666,
+          "throughput_in": 143.4,
+          "throughput_out": 42.6,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c84d4975e79f24d98b58",
+    "name": "Qwen/Qwen1.5-14B-Chat",
+    "display_name": "Qwen 1.5 Chat (14B)",
+    "display_type": "chat",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-14B-Chat",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 14000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2024-02-05T11:36:45.529Z",
+    "update_at": "2024-02-05T11:36:45.529Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x13E78CCaCAc01069EF5a5505aa288eC3bb835eF3": 1 },
+      "asks_updated": "2024-05-10T18:51:22.462254434Z",
+      "gpus": { "": 0 },
+      "qps": 0.4,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 193.06666666666666,
+      "throughput_out": 136.2,
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0.4341556171423196,
+          "qps": 0.4,
+          "throughput_in": 193.06666666666666,
+          "throughput_out": 136.2,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c8474975e79f24d98b57",
+    "name": "Qwen/Qwen1.5-14B",
+    "display_name": "Qwen 1.5 (14B)",
+    "display_type": "language",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-14B",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 14000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {},
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2024-02-05T11:36:39.431Z",
+    "update_at": "2024-02-05T11:36:39.431Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x473F3790526C64D89f0d1598C022bE36492D3051": 1 },
+      "asks_updated": "2024-05-10T18:51:30.246170129Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "660c48d16184ee782ae490f0",
+    "name": "Qwen/Qwen1.5-32B-Chat",
+    "display_name": "Qwen 1.5 Chat (32B)",
+    "display_type": "chat",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 32000000000,
+    "show_in_playground": "true",
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 200, "output": 200, "hourly": 0 },
+    "created_at": "2024-04-02T17:23:42.826Z",
+    "update_at": "2024-04-05T15:40:08.892Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xA47D7a9012B0e335809310AAc55497D50a855a3F": 1 },
+      "asks_updated": "2024-05-11T05:55:35.551622457Z",
+      "gpus": { "": 0 },
+      "qps": 0.26666666666666666,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 64.66666666666667,
+      "throughput_out": 124.26666666666667,
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0.09569027819707074,
+          "qps": 0.26666666666666666,
+          "throughput_in": 64.66666666666667,
+          "throughput_out": 124.26666666666667,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "660c40783cd92bc225de4b41",
+    "name": "Qwen/Qwen1.5-32B",
+    "display_name": "Qwen 1.5 (32B)",
+    "display_type": "language",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 32000000000,
+    "show_in_playground": "true",
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {},
+    "pricing": { "input": 200, "output": 200, "hourly": 0 },
+    "created_at": "2024-04-02T17:23:42.826Z",
+    "update_at": "2024-04-05T15:40:15.875Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xc0a1c6F29F6a40fAC5fedd7Bb1723c7bf566785A": 1 },
+      "asks_updated": "2024-05-10T19:27:02.10899998Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c8344975e79f24d98b54",
+    "name": "Qwen/Qwen1.5-4B-Chat",
+    "display_name": "Qwen 1.5 Chat (4B)",
+    "display_type": "chat",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-4B-Chat",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 4000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2024-02-05T11:36:20.314Z",
+    "update_at": "2024-02-05T11:36:20.314Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 4,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x32F674C484700968dEC9fe5D93C995179FAD2EE3": 1,
+        "0x56cab5C68705D192eA47A8Cf114c3904eC75c52E": 1,
+        "0x83783b52657B34c3e0C2938296009d398954dB26": 1,
+        "0xe34ba24c85fADb5E7fB2dBA0f292C9d25fF2B499": 1
+      },
+      "asks_updated": "2024-05-11T12:37:55.98626009Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c82e4975e79f24d98b53",
+    "name": "Qwen/Qwen1.5-4B",
+    "display_name": "Qwen 1.5 (4B)",
+    "display_type": "language",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-4B",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 4000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {},
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2024-02-05T11:36:14.800Z",
+    "update_at": "2024-02-05T11:36:14.800Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x2cf9F631373B30D4E27961Ac0D58799Fa32D30dc": 1 },
+      "asks_updated": "2024-05-10T16:47:58.648213115Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c85a4975e79f24d98b5a",
+    "name": "Qwen/Qwen1.5-72B-Chat",
+    "display_name": "Qwen 1.5 Chat (72B)",
+    "display_type": "chat",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-72B-Chat",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 72000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2024-02-05T11:36:58.193Z",
+    "update_at": "2024-04-17T19:23:06.511Z",
+    "instances": [
+      { "avzone": "us-central-5b", "cluster": "blusterybull" },
+      { "avzone": "us-south-1a", "cluster": "mustymarfa" }
+    ],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x9b2ef3e00dba4a0949B037095AA8F4FC97aB76Ea": 1,
+        "0xCE288A4aAf0EBc35C602441F03F09139993994A6": 1,
+        "0xcC4AB060c2cbe72ad1466eedE837Fb3Ca7015120": 1
+      },
+      "asks_updated": "2024-05-11T12:20:13.616737256Z",
+      "gpus": { "": 0 },
+      "qps": 0.4666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 770.0666666666667,
+      "throughput_out": 126.4,
+      "stats": [
+        {
+          "avzone": "us-central-5b",
+          "cluster": "blusterybull",
+          "capacity": 0.043187871337840605,
+          "qps": 0.4,
+          "throughput_in": 732,
+          "throughput_out": 124.4,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-south-1a",
+          "cluster": "mustymarfa",
+          "capacity": 0,
+          "qps": 0.06666666666666667,
+          "throughput_in": 38.06666666666667,
+          "throughput_out": 2,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c8544975e79f24d98b59",
+    "name": "Qwen/Qwen1.5-72B",
+    "display_name": "Qwen 1.5 (72B)",
+    "display_type": "language",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-72B",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 72000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {},
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2024-02-05T11:36:52.008Z",
+    "update_at": "2024-02-05T11:36:52.008Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x37A5f0f9744F5bC79Da7908E1b70C10502C4b4cf": 1 },
+      "asks_updated": "2024-05-10T18:50:03.489164666Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0.3333333333333333,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c8404975e79f24d98b56",
+    "name": "Qwen/Qwen1.5-7B-Chat",
+    "display_name": "Qwen 1.5 Chat (7B)",
+    "display_type": "chat",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-7B-Chat",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-02-05T11:36:32.804Z",
+    "update_at": "2024-02-05T11:36:32.804Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x1D0455b2E77572f9584b859f1463114BD4D4EFDE": 1 },
+      "asks_updated": "2024-05-11T01:45:17.557563997Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c0c83a4975e79f24d98b55",
+    "name": "Qwen/Qwen1.5-7B",
+    "display_name": "Qwen 1.5 (7B)",
+    "display_type": "language",
+    "description": "Qwen1.5 is the beta version of Qwen2, a transformer-based decoder-only language model pretrained on a large amount of data. In comparison with the previous released Qwen.",
+    "license": "tongyi-qianwen-research",
+    "link": "https://huggingface.co/Qwen/Qwen1.5-7B",
+    "creator_organization": "Qwen",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {},
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-02-05T11:36:26.420Z",
+    "update_at": "2024-02-05T11:36:26.420Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x2ccdcdEf417d5d6D2EeD95dF48f1fcc8Ec1085b2": 1 },
+      "asks_updated": "2024-05-11T05:55:32.170734058Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0.1,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acee11227f790586239d36",
+    "name": "SG161222/Realistic_Vision_V3.0_VAE",
+    "display_name": "Realistic Vision 3.0",
+    "display_type": "image",
+    "description": "Fine-tune version of Stable Diffusion focused on photorealism.",
+    "license": "creativeml-openrail-m",
+    "link": "https://huggingface.co/SG161222/Realistic_Vision_V1.4",
+    "creator_organization": "SG161222",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "external_pricing_url": "https://www.together.xyz/apis#pricing",
+    "config": { "height": 1024, "width": 1024, "steps": 20, "number_of_images": 2, "seed": 42 },
+    "created_at": "2023-07-11T05:52:17.219Z",
+    "update_at": "2023-07-11T05:52:17.219Z",
+    "descriptionLink": "",
+    "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 },
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x1E128f472069E38aEF6B8f25147B42EF81f0F3C0": 1 },
+      "asks_updated": "2024-05-11T02:40:41.799352496Z",
+      "gpus": { "NVIDIA A40": 1 },
+      "options": { "input=text,image": 1 },
+      "qps": 0.0429948,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 3.8357315
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "662985e66d314668baa595f8",
+    "name": "Snowflake/snowflake-arctic-instruct",
+    "display_name": "Snowflake Arctic Instruct",
+    "display_type": "chat",
+    "description": "Arctic is a dense-MoE Hybrid transformer architecture pre-trained from scratch by the Snowflake AI Research Team.",
+    "license": "Apache-2.0",
+    "link": "https://huggingface.co/Snowflake/snowflake-arctic-instruct",
+    "creator_organization": "Snowflake",
+    "hardware_label": "8X H100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "480000000000",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "add_generation_prompt": true,
+      "chat_template_name": "default",
+      "stop": ["<|im_start|>", "<|im_end|>"]
+    },
+    "pricing": { "input": 600, "output": 600, "hourly": 0 },
+    "update_at": "2024-05-07T05:05:41.946Z",
+    "instances": [{ "avzone": "us-central-6a", "cluster": "mirthfulproxy2" }],
+    "engine": "vllm",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x7F8D3B29224f2a7f2c88118B67815AdCf3E2228d": 1 },
+      "asks_updated": "2024-05-10T14:43:17.112345066Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-6a",
+          "cluster": "mirthfulproxy2",
+          "capacity": 0.86,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "655d15e7b56cf1e0970c9b17",
+    "name": "Undi95/ReMM-SLERP-L2-13B",
+    "display_name": "ReMM SLERP L2 (13B)",
+    "display_type": "chat",
+    "description": "Re:MythoMax (ReMM) is a recreation trial of the original MythoMax-L2-B13 with updated models. This merge use SLERP [TESTING] to merge ReML and Huginn v1.2.",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/Undi95/ReMM-SLERP-L2-13B",
+    "creator_organization": "Undi95",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 13000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": {
+      "prompt_format": "[INST]\n {prompt} \n[/INST]\n\n",
+      "stop": ["[INST]", "\n\n"],
+      "chat_template_name": "llama",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-11-21T20:41:11.759Z",
+    "update_at": "2023-11-21T20:41:11.759Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x07c96Eeb1Bb52ae6FB40543f6188912775F35d52": 1 },
+      "asks_updated": "2024-05-10T17:32:28.22917725Z",
+      "gpus": { "": 0 },
+      "qps": 0.06666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 62.46666666666667,
+      "throughput_out": 0.6666666666666666,
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.13333333333333333,
+          "qps": 0.06666666666666667,
+          "throughput_in": 62.46666666666667,
+          "throughput_out": 0.6666666666666666,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "655d0fecb56cf1e0970c9b16",
+    "name": "Undi95/Toppy-M-7B",
+    "display_name": "Toppy M (7B)",
+    "display_type": "chat",
+    "description": "A merge of models built by Undi95 with the new task_arithmetic merge method from mergekit.",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/Undi95/Toppy-M-7B",
+    "creator_organization": "Undi95",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 7241748480,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": {
+      "stop": ["###"],
+      "prompt_format": "### Instruction:\n{prompt}\n\n### Response:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n\n' }}{% endif %}{% endfor %}{{ '### Response:' }}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-11-21T20:15:40.468Z",
+    "update_at": "2023-11-21T20:15:40.468Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x80bd2D4302331454187F9EdA8b88e99d6E4A6c9b": 1 },
+      "asks_updated": "2024-05-11T07:32:00.722382147Z",
+      "gpus": { "": 0 },
+      "qps": 0.13333333333333333,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 138.46666666666667,
+      "throughput_out": 8.133333333333333,
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.1111111111111111,
+          "qps": 0.13333333333333333,
+          "throughput_in": 138.46666666666667,
+          "throughput_out": 8.133333333333333,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "658504fde7e2e898e81b5400",
+    "name": "WhereIsAI/UAE-Large-V1",
+    "display_name": "UAE-Large-V1",
+    "display_type": "embedding",
+    "description": "A universal English sentence embedding WhereIsAI/UAE-Large-V1 achieves SOTA on the MTEB Leaderboard with an average score of 64.64!",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/bert-base-uncased",
+    "creator_organization": "WhereIsAI",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 330000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "pricing": { "hourly": 0, "input": 4, "output": 4, "finetune": 0, "base": 0 },
+    "created_at": "2023-12-22T03:39:41.105Z",
+    "update_at": "2023-12-22T03:45:34.219Z",
+    "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }],
+    "isFinetuned": false,
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x97E9EAE94B8498A57f4F9033A32d722323C294C8": 1,
+        "0xb8Bfb7F25770CfF8bf88ddF1D29237f1D5604d96": 1
+      },
+      "asks_updated": "2024-05-11T03:02:55.096371076Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-2a",
+          "cluster": "jollyllama",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64fbbc5adfdb1e4b06b5d5cd",
+    "name": "WizardLM/WizardCoder-15B-V1.0",
+    "display_name": "WizardCoder v1.0 (15B)",
+    "display_type": "code",
+    "description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.",
+    "license": "llama2",
+    "creator_organization": "WizardLM",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 15517462528,
+    "show_in_playground": true,
+    "context_length": 8192,
+    "config": {
+      "prompt_format": "### Instruction:\n{prompt}\n\n### Response:\n",
+      "stop": ["###", "<|endoftext|>"],
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}"
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-09-09T00:29:14.496Z",
+    "update_at": "2023-09-09T00:29:14.496Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x4C51aff4170724263bF75af64CE9E2e8F6079fA9": 1,
+        "0xb4CdE622719696fd930e92FB5bBfC3eA3176D2Fd": 1
+      },
+      "asks_updated": "2024-05-11T02:06:56.287724569Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64f672e8bc372ce719b97f02",
+    "name": "WizardLM/WizardCoder-Python-34B-V1.0",
+    "display_name": "WizardCoder Python v1.0 (34B)",
+    "display_type": "code",
+    "description": "This model empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.",
+    "license": "llama2",
+    "creator_organization": "WizardLM",
+    "hardware_label": "2x A100 80GB",
+    "pricing_tier": "supported",
+    "num_parameters": 34000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "config": {
+      "stop": ["</s>", "###"],
+      "prompt_format": "### Instruction:\n{prompt}\n### Response:\n"
+    },
+    "pricing": { "input": 200, "output": 200, "hourly": 0 },
+    "created_at": "2023-09-05T00:14:32.365Z",
+    "update_at": "2023-09-05T00:14:32.365Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xAC3abeabCb3Ef089becEA8b551a4e998AD8dDF30": 1 },
+      "asks_updated": "2024-05-11T09:54:27.918691661Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6567d4e5d1c5e59967640530",
+    "name": "WizardLM/WizardLM-13B-V1.2",
+    "display_name": "WizardLM v1.2 (13B)",
+    "display_type": "chat",
+    "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities",
+    "license": "llama2",
+    "creator_organization": "WizardLM",
+    "hardware_label": "A100",
+    "pricing_tier": "Featured",
+    "num_parameters": 13000000000,
+    "release_date": "2023-11-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "stop": ["</s>", "USER:", "ASSISTANT:"],
+      "prompt_format": "USER: {prompt} ASSISTANT:",
+      "add_generation_prompt": true,
+      "chat_template_name": "llama",
+      "pre_prompt": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. "
+    },
+    "pricing": { "input": 50, "output": 50 },
+    "created_at": "2023-11-30T00:18:45.791Z",
+    "update_at": "2023-11-30T01:20:01.779Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xF9d994b8D62c40bA7532917955dc49D4712C6Ec0": 1 },
+      "asks_updated": "2024-05-10T14:31:00.559469906Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.09090909090909091,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65df9fa4d28dc68bcefec054",
+    "name": "allenai/OLMo-7B-Instruct",
+    "display_name": "OLMo Instruct (7B)",
+    "display_type": "chat",
+    "description": "The OLMo models are trained on the Dolma dataset",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/allenai/OLMo-7B-Instruct",
+    "creator_organization": "AllenAI",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "config": {
+      "eos_token": "<|endoftext|>",
+      "prompt_format": "<|user|>\n{prompt}\n<|assistant|>",
+      "stop": ["<|endoftext|>"],
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + eos_token }}{% elif message['role'] == 'assistant' %}{{ '<|assistant|>\n'  + message['content'] + eos_token }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}{% endfor %}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-02-28T21:03:32.038Z",
+    "update_at": "2024-02-28T21:03:32.038Z",
+    "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xD29D5B02918F962505749Ace7d67AB3E2acAbc67": 1 },
+      "asks_updated": "2024-05-11T02:57:58.795395564Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-2a",
+          "cluster": "jollyllama",
+          "capacity": 0.0625,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65dfa682d28dc68bcefec055",
+    "name": "allenai/OLMo-7B-Twin-2T",
+    "display_name": "OLMo Twin-2T (7B)",
+    "display_type": "language",
+    "description": "The OLMo models are trained on the Dolma dataset",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/allenai/OLMo-7B-Twin-2T",
+    "creator_organization": "AllenAI",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-02-28T21:32:50.812Z",
+    "update_at": "2024-02-28T21:32:50.812Z",
+    "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x3f2D5E8E2C72C0A63A478da9774d8C2F1F4E5c55": 1 },
+      "asks_updated": "2024-05-11T03:07:59.684414475Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-2a",
+          "cluster": "jollyllama",
+          "capacity": 0.07142857142857142,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65dfa6ebd28dc68bcefec056",
+    "name": "allenai/OLMo-7B",
+    "display_name": "OLMo (7B)",
+    "display_type": "language",
+    "description": "The OLMo models are trained on the Dolma dataset",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/allenai/OLMo-7B",
+    "creator_organization": "AllenAI",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-02-28T21:34:35.444Z",
+    "update_at": "2024-02-28T21:34:35.444Z",
+    "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xfC0C60D66A62b2A87f96B3318500e876F1B1e367": 1 },
+      "asks_updated": "2024-05-11T02:56:32.514653629Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-2a",
+          "cluster": "jollyllama",
+          "capacity": 0.07142857142857142,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6598bc0201bf780326e7eac8",
+    "name": "bert-base-uncased",
+    "display_name": "Bert Base Uncased",
+    "display_type": "embedding",
+    "description": "original BERT model",
+    "license": "Apache-2",
+    "creator_organization": "Google",
+    "hardware_label": "A40",
+    "pricing_tier": "Featured",
+    "num_parameters": 46550608,
+    "release_date": "2023-11-15T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 },
+    "created_at": "2024-01-06T02:33:38.323Z",
+    "update_at": "2024-01-06T02:33:38.323Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 6,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x0b7eae8cCeb3D67b02A97ac2D1100E29E6991EB9": 1,
+        "0x21558AA2fCc15eF003135a4108a0884d4A3054f2": 1,
+        "0x2fb2cf26D55c96dc0BAad5f088b0e5Bf0FDe565B": 1,
+        "0x5857eaB3609A074E402972C3DDDE8957ea4E7dC5": 1,
+        "0xB49Bf891cBeba9F3e5045acbD9CD7C3fD932A543": 1,
+        "0xC412E22A5B1CE26b65B80f2217b9419369057714": 1
+      },
+      "asks_updated": "2024-05-11T03:13:31.258661006Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6de95e620478cfa14425b",
+    "name": "codellama/CodeLlama-13b-Instruct-hf",
+    "display_name": "Code Llama Instruct (13B)",
+    "display_type": "chat",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "LLAMA 2 Community license Agreement (Meta)",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": "13016028160",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 16384,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "add_generation_prompt": true,
+      "stop": ["</s>", "[INST]"],
+      "chat_template_name": "llama"
+    },
+    "pricing": { "input": 55, "output": 55, "hourly": 0 },
+    "created_at": "2023-08-24T17:09:14.381Z",
+    "update_at": "2023-12-04T05:01:42.539Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 3,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x085bF8877517A750f62641F8FE4C5a2D6b26e899": 1,
+        "0x934A45b707cbe77453d7d14F4d84F31CaF8adc6F": 1,
+        "0xA6c2278710AC89440e150857521e67572D52f303": 1
+      },
+      "asks_updated": "2024-05-11T05:55:30.372800468Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6de95e620478cfa14425a",
+    "name": "codellama/CodeLlama-13b-Python-hf",
+    "display_name": "Code Llama Python (13B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "LLAMA 2 Community license Agreement (Meta)",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": "13016028160",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 16384,
+    "config": { "stop": ["</s>"], "chat_template_name": "llama" },
+    "pricing": { "input": 55, "output": 55, "hourly": 0 },
+    "created_at": "2023-08-24T17:09:14.381Z",
+    "update_at": "2023-12-20T22:52:59.177Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 6,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x1F6868Df357950c3F6E5804a60d146A883f1fC7b": 1,
+        "0x6b3EbBfa6c3DFDa17dD19c35557A9F3bAdD55583": 1,
+        "0x8eb3F32C3999eaD4867f54ABE0098a0bFE9e2f23": 1,
+        "0xA405565bdBf98e1aFd8CcBEdc028F0546c41eB47": 1,
+        "0xBd7eC5bF0b33b56c916A4b2deB99A37025837d9a": 1,
+        "0xc710087956F114639A3726cb6d4302B125822574": 1
+      },
+      "asks_updated": "2024-05-11T12:35:27.588482466Z",
+      "gpus": { "": 0 },
+      "qps": 0.4,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 201.46666666666667,
+      "throughput_out": 142.73333333333332,
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0,
+          "qps": 0.4,
+          "throughput_in": 201.46666666666667,
+          "throughput_out": 142.73333333333332,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6de95e620478cfa144261",
+    "name": "codellama/CodeLlama-34b-Instruct-hf",
+    "display_name": "Code Llama Instruct (34B)",
+    "display_type": "chat",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "LLAMA 2 Community license Agreement (Meta)",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": 34000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 16384,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "add_generation_prompt": true,
+      "stop": ["</s>", "[INST]"],
+      "chat_template_name": "llama",
+      "tools_template": "{{ '<<SYS>>\\n' + systemMessage['content'] + '\\n\\nYou can access the following functions. Use them if required -\\n' + tools + '\\n<</SYS>>\\n\\n' + message['content'] }}"
+    },
+    "pricing": { "input": 194, "output": 194, "hourly": 0 },
+    "created_at": "2023-08-24T17:28:42.172Z",
+    "update_at": "2023-08-24T17:28:42.172Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xF5546B0d0414AFfc8ee2Dc36D61EcAF3a2ec65F5": 1 },
+      "asks_updated": "2024-05-10T17:12:16.470434811Z",
+      "gpus": { "": 0 },
+      "qps": 0.06666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 93.6,
+      "throughput_out": 14.8,
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0.058823529411764705,
+          "qps": 0.06666666666666667,
+          "throughput_in": 93.6,
+          "throughput_out": 14.8,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6de95e620478cfa144260",
+    "name": "codellama/CodeLlama-34b-Python-hf",
+    "display_name": "Code Llama Python (34B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "LLAMA 2 Community license Agreement (Meta)",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": 34000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 16384,
+    "config": { "stop": ["</s>"], "chat_template_name": "llama" },
+    "pricing": { "input": 194, "output": 194, "hourly": 0 },
+    "created_at": "2023-08-24T17:28:42.172Z",
+    "update_at": "2023-08-24T17:28:42.172Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xe09fF3EE0889C8F5c9e434E8AF523649805E34e1": 1 },
+      "asks_updated": "2024-05-10T17:08:19.24073671Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65b6f505752a299002ee4dc9",
+    "name": "codellama/CodeLlama-70b-Instruct-hf",
+    "display_name": "Code Llama Instruct (70B)",
+    "display_type": "chat",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/codellama/CodeLlama-70b-Instruct-hf",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "70000000000",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "chat_template": "{{ bos_token + ' ' }}{% for message in messages %}{{'Source: ' + message['role'].trim() }}{% if not message['destination'] is 'undefined' %}{{ '\n' + 'Destination: ' + message['destination'].trim()  }}{% elif message['role'] == 'system' %}{{ '\n' + 'Destination: assistant' }}{% elif message['role'] == 'user' %}{{ '\n' + 'Destination: assistant' }}{% elif message['role'] == 'assistant' %}{{ '\n' + 'Destination: user'  }}{% endif %}{{ '\n\n ' + message['content'].trim() + '<step>'  + ' '}}{% endfor %}{% if add_generation_prompt %}{{ 'Source: assistant' + '\n' }}{{ 'Destination: user' + '\n\n' + ' '  }}{% endif %}",
+      "bos_token": "<s>",
+      "step_id": "<step>",
+      "stop": ["<step>"],
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2024-01-29T00:44:53.513Z",
+    "update_at": "2024-01-29T00:44:53.513Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xa2663C264Db2177E3Ae3Ea643152B2b9b1f1dA6c": 1 },
+      "asks_updated": "2024-05-11T05:55:32.068494589Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0.010869565217391304,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65b6f4ba752a299002ee4dc7",
+    "name": "codellama/CodeLlama-70b-Python-hf",
+    "display_name": "Code Llama Python (70B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/codellama/CodeLlama-70b-Python-hf",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "70000000000",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": { "stop": ["</s>"] },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2024-01-29T00:43:38.396Z",
+    "update_at": "2024-01-29T00:43:38.396Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xf2a7de1a0E1dC83DC5B1f1dE8783dFEc67be8910": 1 },
+      "asks_updated": "2024-05-10T18:49:47.188860922Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65b6f4d4752a299002ee4dc8",
+    "name": "codellama/CodeLlama-70b-hf",
+    "display_name": "Code Llama (70B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/codellama/CodeLlama-70b-hf",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "70000000000",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 16384,
+    "config": { "stop": ["</s>"] },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2024-01-29T00:44:04.149Z",
+    "update_at": "2024-01-29T00:44:04.149Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 5,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x39E9cFF7e3169839d6D0Cd8262E618c2AaaA3625": 1,
+        "0x4e5281dB0A47701fb2Bb5669175C2A51d5e06496": 1,
+        "0x8310bA7683A57Ce0d2A431FC53b74FCc1bF2cF94": 1,
+        "0xd3C0F1d52BeBdB36d105AFB149b16fbb41a30810": 1,
+        "0xe8eC4B8AA74A5Fc2FC3A961F97fE6dE030f979b1": 1
+      },
+      "asks_updated": "2024-05-11T05:55:39.590729933Z",
+      "gpus": { "": 0 },
+      "qps": 0.6,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 621.7333333333333,
+      "throughput_out": 48.666666666666664,
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 1.21875,
+          "qps": 0.6,
+          "throughput_in": 621.7333333333333,
+          "throughput_out": 48.666666666666664,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6de95e620478cfa14425e",
+    "name": "codellama/CodeLlama-7b-Instruct-hf",
+    "display_name": "Code Llama Instruct (7B)",
+    "display_type": "chat",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "LLAMA 2 Community license Agreement (Meta)",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": "6738546688",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 16384,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["</s>", "[INST]"],
+      "chat_template_name": "llama",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-08-24T17:08:25.379Z",
+    "update_at": "2023-08-24T17:08:25.379Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x6Af33E97914777b841B6f7a11332bF142301403F": 1 },
+      "asks_updated": "2024-05-11T11:22:28.25491246Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.25,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6de95e620478cfa14425d",
+    "name": "codellama/CodeLlama-7b-Python-hf",
+    "display_name": "Code Llama Python (7B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "LLAMA 2 Community license Agreement (Meta)",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": "6738546688",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 16384,
+    "config": { "stop": ["</s>"], "chat_template_name": "llama" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-08-24T17:08:25.379Z",
+    "update_at": "2023-08-24T17:08:25.379Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x68494584Fc039C3698b8E02eDe63d45DE596DE17": 1,
+        "0xcd079137d79089474d4d876109cECE8bb33E7Ef9": 1
+      },
+      "asks_updated": "2024-05-11T12:05:29.893871627Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "660899dc98fbb3f86d028781",
+    "name": "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
+    "display_name": "Dolphin 2.5 Mixtral 8x7b",
+    "display_type": "chat",
+    "description": "This Dolphin is really good at coding, I trained with a lot of coding data. It is very obedient but it is not DPO tuned - so you still might need to encourage it in the system prompt as I show in the below examples.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/cognitivecomputations/dolphin-2.5-mixtral-8x7b",
+    "creator_organization": "cognitivecomputations",
+    "hardware_label": "8x H100 80GB SXM",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 56000000000,
+    "release_date": "2023-08-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "pre_prompt": "<|im_start|>system\nYou are Dolphin, an uncensored and unbiased AI assistant.<|im_end|>\n",
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "add_generation_prompt": true,
+      "chat_template_name": "default"
+    },
+    "pricing": { "input": 150, "output": 150, "hourly": 0 },
+    "created_at": "2023-09-05T19:58:25.683Z",
+    "update_at": "2023-09-05T19:58:25.683Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "isDedicatedInstance": false,
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xC247D4e301f7bB0ecf719577741bF912919332e6": 1 },
+      "asks_updated": "2024-05-10T19:00:59.773522401Z",
+      "gpus": { "": 0 },
+      "qps": 0.06666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 96.2,
+      "throughput_out": 10.266666666666667,
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0,
+          "qps": 0.06666666666666667,
+          "throughput_in": 96.2,
+          "throughput_out": 10.266666666666667,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "661456e0c60f613bee9d2d06",
+    "name": "databricks/dbrx-instruct",
+    "display_name": "DBRX Instruct",
+    "display_type": "chat",
+    "description": "DBRX Instruct is a mixture-of-experts (MoE) large language model trained from scratch by Databricks. DBRX Instruct specializes in few-turn interactions.",
+    "license": "Databricks Open Model License",
+    "link": "https://huggingface.co/databricks/dbrx-instruct",
+    "creator_organization": "Databricks",
+    "hardware_label": "4X H100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "132000000000",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "add_generation_prompt": true,
+      "chat_template_name": "default",
+      "stop": ["<|im_start|>", "<|im_end|>"]
+    },
+    "pricing": { "input": 300, "output": 300, "hourly": 0 },
+    "instances": [{ "avzone": "us-central-6a", "cluster": "mirthfulproxy2" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xEf707f83DC8C7BA4C1b1D289C3380dF993A3E507": 1 },
+      "asks_updated": "2024-05-10T15:35:58.638900703Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-6a",
+          "cluster": "mirthfulproxy2",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65c3137e4975e79f24d98b5c",
+    "name": "deepseek-ai/deepseek-coder-33b-instruct",
+    "display_name": "Deepseek Coder Instruct (33B)",
+    "display_type": "chat",
+    "description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.",
+    "license": "deepseek",
+    "link": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
+    "creator_organization": "DeepSeek",
+    "pricing_tier": "Featured",
+    "num_parameters": 33000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 16384,
+    "config": {
+      "prompt_format": "",
+      "stop": ["<|EOT|>", "<｜begin▁of▁sentence｜>", "<｜end▁of▁sentence｜>"],
+      "bos_token": "<｜begin▁of▁sentence｜>",
+      "add_generation_prompt": true,
+      "chat_template": "{{'<｜begin▁of▁sentence｜>'}}{%- for message in messages %}{%- if message['role'] == 'system' %}{{ message['content'] }}{%- else %}{%- if message['role'] == 'user' %}{{'### Instruction:\\n' + message['content'] + '\\n'}}{%- else %}{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}{%- endif %}{%- endif %}{%- endfor %}{% if add_generation_prompt %}{{'### Response:'}}{% endif %}"
+    },
+    "pricing": { "input": 200, "output": 200, "hourly": 0 },
+    "created_at": "2024-02-07T05:22:06.809Z",
+    "update_at": "2024-02-07T05:22:06.809Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 3,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x25DEF2A3bBB026031AB7eED0439aC90cb9269E2D": 1,
+        "0x794F82c9417C98e5B30A14165481686E8e94251f": 1,
+        "0xA6091E5e79d33269023eC3413e1a4bD94870685C": 1
+      },
+      "asks_updated": "2024-05-10T21:27:01.421440752Z",
+      "gpus": { "": 0 },
+      "qps": 2.4,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 2281.866666666667,
+      "throughput_out": 535.0666666666667,
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0.49519890260630994,
+          "qps": 2.4,
+          "throughput_in": 2281.866666666667,
+          "throughput_out": 535.0666666666667,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "660c58976184ee782ae490f1",
+    "name": "deepseek-ai/deepseek-llm-67b-chat",
+    "display_name": "DeepSeek LLM Chat (67B)",
+    "display_type": "chat",
+    "description": "trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese",
+    "license": "deepseek",
+    "link": "https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat",
+    "creator_organization": "DeepSeek",
+    "pricing_tier": "",
+    "num_parameters": 67000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "owner_userid": "",
+    "config": {
+      "prompt_format": "",
+      "stop": ["<｜begin▁of▁sentence｜>", "<｜end▁of▁sentence｜>"],
+      "bos_token": "<｜begin▁of▁sentence｜>",
+      "add_generation_prompt": true,
+      "chat_template": "{{ '<｜begin▁of▁sentence｜>' }}{% for message in messages %}{% if message['role'] == 'user' %} {{ 'User: ' + message['content'] + '\n\n'}}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + '<｜end▁of▁sentence｜>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}"
+    },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2024-04-02T19:12:23.328Z",
+    "update_at": "2024-04-02T19:12:23.328Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x9E9Eb2e3fD4122006fF73bE9Bb0aFF2572549326": 1 },
+      "asks_updated": "2024-05-10T14:04:42.701320488Z",
+      "gpus": { "": 0 },
+      "qps": 0.13333333333333333,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 40.13333333333333,
+      "throughput_out": 31.866666666666667,
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.125,
+          "qps": 0.13333333333333333,
+          "throughput_in": 40.13333333333333,
+          "throughput_out": 31.866666666666667,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64f676f7bc372ce719b97f04",
+    "name": "garage-bAInd/Platypus2-70B-instruct",
+    "display_name": "Platypus2 Instruct (70B)",
+    "display_type": "chat",
+    "description": "An instruction fine-tuned LLaMA-2 (70B) model by merging Platypus2 (70B) by garage-bAInd and LLaMA-2 Instruct v2 (70B) by upstage.",
+    "license": "CC BY-NC-4.0",
+    "creator_organization": "garage-bAInd",
+    "hardware_label": "2x A100 80GB",
+    "pricing_tier": "featured",
+    "num_parameters": 70000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "stop": ["</s>", "###"],
+      "prompt_format": "### Instruction:\n{prompt}\n### Response:\n",
+      "add_generation_prompt": true,
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %} {{ '### Instruction:\n' + message['content'] + '\n' }}{% elif message['role'] == 'system' %}{{ '### System:\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response:\n' + message['content'] + '\n'  }}{% endif %}{% if loop.last %}{{ '### Response:\n' }}{% endif %}{% endfor %}"
+    },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2023-09-05T00:31:51.264Z",
+    "update_at": "2023-09-07T01:46:29.338Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x32fA272f7D81963fc8EE3DCA70E28a00BB5f2617": 1 },
+      "asks_updated": "2024-05-10T18:51:18.425217527Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0.2333333333333333,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65d7e89e03b97802d3af0512",
+    "name": "google/gemma-2b-it",
+    "display_name": "Gemma Instruct (2B)",
+    "display_type": "chat",
+    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "license": "gemma-terms-of-use",
+    "link": "https://huggingface.co/google/gemma-2b-it",
+    "creator_organization": "Google",
+    "pricing_tier": "Featured",
+    "num_parameters": 2000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "config": {
+      "stop": ["<eos>", "<end_of_turn>"],
+      "chat_template": "{{ bos_token }}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{% for message in messages %}{{'<start_of_turn>' + role + '\n' + message['content'] + '<end_of_turn>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>model\n' }}{% endif %}",
+      "bos_token": "<bos>"
+    },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2024-02-23T00:36:46.381Z",
+    "update_at": "2024-02-23T00:36:46.381Z",
+    "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x83ff7421906004DEa319FB2Dc5766F86f146973E": 1 },
+      "asks_updated": "2024-05-11T03:02:55.608299306Z",
+      "gpus": { "": 0 },
+      "qps": 0.8,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 55.733333333333334,
+      "throughput_out": 5.533333333333333,
+      "stats": [
+        {
+          "avzone": "us-central-2a",
+          "cluster": "jollyllama",
+          "capacity": 0.0078125,
+          "qps": 0.8,
+          "throughput_in": 55.733333333333334,
+          "throughput_out": 5.533333333333333,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65d7e93203b97802d3af0513",
+    "name": "google/gemma-2b",
+    "display_name": "Gemma (2B)",
+    "display_type": "language",
+    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "license": "gemma-terms-of-use",
+    "link": "https://huggingface.co/google/gemma-2b",
+    "creator_organization": "Google",
+    "pricing_tier": "Featured",
+    "num_parameters": 2000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2024-02-23T00:39:14.772Z",
+    "update_at": "2024-02-23T00:39:14.772Z",
+    "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x3B05b29E71860Ca416cEe96c7e793c36fc4Ce5Ff": 1,
+        "0x9CFcBB9434f86b6Ce544DB9880af29d188d9433f": 1
+      },
+      "asks_updated": "2024-05-11T03:12:00.872484732Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-2a",
+          "cluster": "jollyllama",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65d7ea3d03b97802d3af0515",
+    "name": "google/gemma-7b-it",
+    "display_name": "Gemma Instruct (7B)",
+    "display_type": "chat",
+    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "license": "gemma-terms-of-use",
+    "link": "https://huggingface.co/google/gemma-7b-it",
+    "creator_organization": "Google",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "config": {
+      "stop": ["<eos>", "<end_of_turn>"],
+      "chat_template": "{{ bos_token }}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{% for message in messages %}{{'<start_of_turn>' + role + '\n' + message['content'] + '<end_of_turn>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>model\n' }}{% endif %}",
+      "bos_token": "<bos>"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-02-23T00:43:41.936Z",
+    "update_at": "2024-02-23T00:43:41.936Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xa368a540D087220119290B897192743bFE379beE": 1 },
+      "asks_updated": "2024-05-10T16:04:09.532225327Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65d7ea3b03b97802d3af0514",
+    "name": "google/gemma-7b",
+    "display_name": "Gemma (7B)",
+    "display_type": "language",
+    "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
+    "license": "gemma-terms-of-use",
+    "link": "https://huggingface.co/google/gemma-7b",
+    "creator_organization": "Google",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-02-23T00:43:39.642Z",
+    "update_at": "2024-02-23T00:43:39.642Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xbcD29dE615e898c76dc514D5DD7461CF0Be72245": 1 },
+      "asks_updated": "2024-05-11T06:31:48.292488587Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64f678e7bc372ce719b97f06",
+    "name": "lmsys/vicuna-13b-v1.5",
+    "display_name": "Vicuna v1.5 (13B)",
+    "display_type": "chat",
+    "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
+    "license": "llama2",
+    "creator_organization": "LM Sys",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 13000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "stop": ["</s>"],
+      "prompt_format": "USER: {prompt}\nASSISTANT:",
+      "chat_template": "{% for message in messages %}{{message['role'].toLocaleUpperCase() + ': ' + message['content'] + '\n'}}{% endfor %}{{ 'ASSISTANT:' }}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-09-05T00:40:07.763Z",
+    "update_at": "2023-09-05T00:40:07.763Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x8C25c0cAC3C50A94Fa1444a843BD3ab684640fc0": 1 },
+      "asks_updated": "2024-05-11T12:20:14.699866977Z",
+      "gpus": { "": 0 },
+      "qps": 0.06666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 142.53333333333333,
+      "throughput_out": 23.266666666666666,
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.09090909090909091,
+          "qps": 0.06666666666666667,
+          "throughput_in": 142.53333333333333,
+          "throughput_out": 23.266666666666666,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "652da26579174a6bc507647f",
+    "name": "lmsys/vicuna-7b-v1.5",
+    "display_name": "Vicuna v1.5 (7B)",
+    "display_type": "chat",
+    "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/lmsys/vicuna-7b-v1.5",
+    "creator_organization": "LM Sys",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 6738415616,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": {
+      "stop": ["</s>", "USER:"],
+      "add_generation_prompt": true,
+      "prompt_format": "USER: {prompt}\nASSISTANT: Hello!",
+      "chat_template": "{% for message in messages %}{{message['role'].toLocaleUpperCase() + ': ' + message['content'] + '\n'}}{% endfor %}{{ 'ASSISTANT:' }}"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-10-16T20:51:49.194Z",
+    "update_at": "2023-10-16T20:51:49.194Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x9d8f9Db61974247B3743b4492f24C424d6Ec9647": 1 },
+      "asks_updated": "2024-05-11T12:20:55.695488101Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6dd9de620478cfa144258",
+    "name": "meta-llama/Llama-2-13b-chat-hf",
+    "display_name": "LLaMA-2 Chat (13B)",
+    "display_type": "chat",
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/meta-llama/Llama-2-13b-chat-hf",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "13015864320",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["[/INST]", "</s>"],
+      "add_generation_prompt": true,
+      "chat_template_name": "llama"
+    },
+    "pricing": { "input": 55, "output": 55, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2023-12-04T05:00:54.436Z",
+    "instances": [{ "avzone": "us-west-1a", "cluster": "curiouscrow" }],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x582Ee7216416721CF6101f0A37098C2741824E4B": 1 },
+      "asks_updated": "2024-05-10T17:20:38.284283484Z",
+      "gpus": { "": 0 },
+      "qps": 1.2,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 173.4,
+      "throughput_out": 90.66666666666667,
+      "stats": [
+        {
+          "avzone": "us-west-1a",
+          "cluster": "curiouscrow",
+          "capacity": 0.1351851851851851,
+          "qps": 1.2,
+          "throughput_in": 173.4,
+          "throughput_out": 90.66666666666667,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6dd03e620478cfa144255",
+    "name": "meta-llama/Llama-2-13b-hf",
+    "display_name": "LLaMA-2 (13B)",
+    "display_type": "language",
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/meta-llama/Llama-2-13b-hf",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "13015864320",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": { "stop": ["</s>"] },
+    "pricing": { "input": 55, "output": 55, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2023-12-04T05:07:52.318Z",
+    "instances": [{ "avzone": "us-west-1a", "cluster": "curiouscrow" }],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x4d41543337D4c322a31a0F9913af3C8708876249": 1 },
+      "asks_updated": "2024-05-10T21:29:01.763024089Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-west-1a",
+          "cluster": "curiouscrow",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6dd95e620478cfa144257",
+    "name": "meta-llama/Llama-2-70b-chat-hf",
+    "display_name": "LLaMA-2 Chat (70B)",
+    "display_type": "chat",
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/meta-llama/Llama-2-70b-chat-hf",
+    "creator_organization": "Meta",
+    "hardware_label": "2X A100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "68976648192",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["[/INST]", "</s>"],
+      "add_generation_prompt": true,
+      "chat_template_name": "llama"
+    },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2024-04-19T01:11:44.938Z",
+    "autopilot_pool": "cr-a100-80-2x",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x8E20042a6661ccC893087dE6B593f6F1998769dE": 1,
+        "0xcd4a3777cA2A18Fe8AebDc19A9411c799a8282DC": 1
+      },
+      "asks_updated": "2024-05-11T01:00:59.944127024Z",
+      "gpus": { "": 0 },
+      "qps": 0.4666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 419.4,
+      "throughput_out": 118,
+      "error_rate": 44.2,
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.15625,
+          "qps": 0.4666666666666667,
+          "throughput_in": 419.4,
+          "throughput_out": 118,
+          "error_rate": 44.2,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6dd0ee620478cfa144256",
+    "name": "meta-llama/Llama-2-70b-hf",
+    "display_name": "LLaMA-2 (70B)",
+    "display_type": "language",
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/meta-llama/Llama-2-70b-hf",
+    "creator_organization": "Meta",
+    "hardware_label": "2X A100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "68976648192",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": { "stop": ["</s>"] },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2023-07-18T22:46:55.042Z",
+    "autopilot_pool": "cr-a100-80-2x",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xB7F462fEd161Ff92f48aaF2302C2a19fA01FdeB4": 1 },
+      "asks_updated": "2024-05-11T01:01:02.822830948Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6dda7e620478cfa144259",
+    "name": "meta-llama/Llama-2-7b-chat-hf",
+    "display_name": "LLaMA-2 Chat (7B)",
+    "display_type": "chat",
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "6738415616",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["[/INST]", "</s>"],
+      "add_generation_prompt": true,
+      "chat_template_name": "llama"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2023-07-18T22:46:55.042Z",
+    "instances": [
+      { "avzone": "us-central-1a", "cluster": "sassyseal" },
+      { "avzone": "us-central-2a", "cluster": "jollyllama" },
+      { "avzone": "us-east-2a", "cluster": "jumpyjackal" }
+    ],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x05655a9b3C902ceC9a13CfB61bc8f1FAfCdE7Aa8": 1,
+        "0x0c409751A39422fb09dbd0DB2EE0a2E69Bb29f40": 1,
+        "0x2701d6319108F711a8e435E3778340E359b8eaEd": 1
+      },
+      "asks_updated": "2024-05-11T03:05:24.933183344Z",
+      "gpus": { "": 0 },
+      "qps": 0.06666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 1,
+      "throughput_out": 0.6,
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-central-2a",
+          "cluster": "jollyllama",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.05000000000000001,
+          "qps": 0.06666666666666667,
+          "throughput_in": 1,
+          "throughput_out": 0.6,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6db78e620478cfa144254",
+    "name": "meta-llama/Llama-2-7b-hf",
+    "display_name": "LLaMA-2 (7B)",
+    "display_type": "language",
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "LLaMA license Agreement (Meta)",
+    "link": "https://huggingface.co/meta-llama/Llama-2-7b-hf",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "6738415616",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": { "stop": ["</s>"] },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2023-07-18T22:46:55.042Z",
+    "instances": [{ "avzone": "us-west-1a", "cluster": "curiouscrow" }],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x5d723e1Ad99BDdd03b5C442fd5b487a86Dc42c48": 1 },
+      "asks_updated": "2024-05-10T21:28:57.525357288Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-west-1a",
+          "cluster": "curiouscrow",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6620daf44b2da307838b7cf1",
+    "name": "meta-llama/Llama-3-70b-chat-hf",
+    "display_name": "Meta Llama 3 70B Instruct",
+    "display_type": "chat",
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "license": "Llama-3 (Other)",
+    "link": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "num_parameters": 70000000000,
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "owner_userid": null,
+    "config": {
+      "stop": ["<|eot_id|>"],
+      "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
+      "bos_token": "<|begin_of_text|>",
+      "eos_token": "<|end_of_text|>",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2024-04-18T08:33:56.492Z",
+    "update_at": "2024-04-24T19:06:49.423Z",
+    "autopilot_pool": "cr-a100-80-2x",
+    "instances": [
+      { "avzone": "us-south-1a", "cluster": "mustymarfa" },
+      { "avzone": "us-east-1a", "cluster": "happypiglet" },
+      { "avzone": "us-central-5b", "cluster": "blusterybull" },
+      { "avzone": "us-central-6a", "cluster": "mirthfulmonkey" }
+    ],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 20,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x005c07c763D13C42c16bcBF1843a188C91BeAFE4": 1,
+        "0x0Af678C6EBA809c95e3865762EC5A5ED93760CDc": 1,
+        "0x1b1121F2b72c2B4A0B7E510eCC54d6258A1d9F37": 1,
+        "0x1b4202742bb1c8f9d489adb4E2E3Fe632306FC81": 1,
+        "0x46b05D3f5674618344647C66538f94Ce85447310": 1,
+        "0x5E700261AE0a2b4F9D17C30558dfB000ddC55443": 1,
+        "0x5eD6330f693ee11865F36c8B2DE699C7A7D39a63": 1,
+        "0x680d04EFD3fA4E594AAEb5d0C04ce9F1090dFc3F": 1,
+        "0x72677CcC420192765c84cC20A6084F27f96Ee10D": 1,
+        "0x75B477C1E0F49DAc7D4be39C49Ec61BA67c376F2": 1,
+        "0x7D3BFcCbC57876224E5Bc1a212303B65bd370Ac5": 1,
+        "0x83dEe7D7B381E647511665f7AC2b2d70172B0E92": 1,
+        "0xA9B22801f41A96f4006d8c68B21207982d042201": 1,
+        "0xC5C7c6c638f24C9A60c5CA6DB4b373aa03209677": 1,
+        "0xEcc0D7cA34f421465Dc446C26A7614Dc38462693": 1,
+        "0xF573A972dA992d7a245B77eA3fB6c1B638f5Ff76": 1,
+        "0xFE26662735641dE91AEd95adb2c76C7DcF0Edc3c": 1,
+        "0xa191fbB16Af2E4A65714d20E5b1Eef3324DF3395": 1,
+        "0xbbB6B83dc920A8644EfB8e146d894AB64e96bA56": 1,
+        "0xc1d639C835f3c106DEE8e2c6591E6FC39ec0029D": 1,
+        "0xdcd96C00e09261DBab13B1F011DF7B0F5C1CeA92": 1,
+        "0xe7f47dD4666f588b91B1C7E605c4c7E2A08bbBD9": 1,
+        "0xeA0069EF1527e8Cd91C949f6965405B805852d16": 1
+      },
+      "asks_updated": "2024-05-11T12:26:15.222176169Z",
+      "gpus": { "": 0 },
+      "qps": 12.4,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 11489.666666666666,
+      "throughput_out": 2889.3333333333335,
+      "error_rate": 0.06666666666666667,
+      "retry_rate": 0.13333333333333333,
+      "stats": [
+        {
+          "avzone": "us-south-1a",
+          "cluster": "mustymarfa",
+          "capacity": 0.10318060911173708,
+          "qps": 6,
+          "throughput_in": 5823.4,
+          "throughput_out": 1358.7333333333333,
+          "error_rate": 0.06666666666666667,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0.11713261648745518,
+          "qps": 2.1333333333333333,
+          "throughput_in": 1762.7333333333333,
+          "throughput_out": 643.2,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-central-5b",
+          "cluster": "blusterybull",
+          "capacity": 0.14247052101189547,
+          "qps": 2.7333333333333334,
+          "throughput_in": 2837,
+          "throughput_out": 670.0666666666667,
+          "error_rate": 0,
+          "retry_rate": 0.13333333333333333
+        },
+        {
+          "avzone": "us-central-6a",
+          "cluster": "mirthfulmonkey",
+          "capacity": 0.12173566741121672,
+          "qps": 1.5333333333333334,
+          "throughput_in": 1066.5333333333333,
+          "throughput_out": 217.33333333333334,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6620b8bf4b2da307838b7cf0",
+    "name": "meta-llama/Llama-3-8b-chat-hf",
+    "display_name": "Meta Llama 3 8B Instruct",
+    "display_type": "chat",
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "license": "Llama-3 (Other)",
+    "link": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct",
+    "creator_organization": "Meta",
+    "pricing_tier": null,
+    "num_parameters": 8000000000,
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "owner_userid": null,
+    "config": {
+      "stop": ["<|eot_id|>"],
+      "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
+      "bos_token": "<|begin_of_text|>",
+      "eos_token": "<|end_of_text|>",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-04-18T06:07:59.041Z",
+    "update_at": "2024-04-24T19:14:26.075Z",
+    "instances": [
+      { "avzone": "us-south-1a", "cluster": "mustymarfa" },
+      { "avzone": "us-central-5b", "cluster": "blusterybull" },
+      { "avzone": "us-east-1a", "cluster": "happypiglet" }
+    ],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x51ED0439B7c9e057aCF5357ec96311b9Ad479069": 1,
+        "0xa463152B8228A26253897AE01A8C252692B48ae7": 1,
+        "0xc84A8565C896870EE5ab16688B240F4c7625F5Bb": 1
+      },
+      "asks_updated": "2024-05-10T16:26:56.562519239Z",
+      "gpus": { "": 0 },
+      "qps": 7.466666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 6643.066666666667,
+      "throughput_out": 382.8666666666667,
+      "retry_rate": 3.0666666666666664,
+      "stats": [
+        {
+          "avzone": "us-south-1a",
+          "cluster": "mustymarfa",
+          "capacity": 0.034627474728759836,
+          "qps": 4.8,
+          "throughput_in": 4375,
+          "throughput_out": 238.06666666666666,
+          "error_rate": 0,
+          "retry_rate": 1.6
+        },
+        {
+          "avzone": "us-central-5b",
+          "cluster": "blusterybull",
+          "capacity": 0.0434983732441059,
+          "qps": 2.6666666666666665,
+          "throughput_in": 2268.0666666666666,
+          "throughput_out": 144.8,
+          "error_rate": 0,
+          "retry_rate": 1.4666666666666666
+        },
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6620db1d4b2da307838b7cf3",
+    "name": "meta-llama/Llama-3-8b-hf",
+    "display_name": "Meta Llama 3 8B",
+    "display_type": "language",
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "license": "",
+    "link": "https://huggingface.co/meta-llama/Meta-Llama-3-8B",
+    "creator_organization": "Meta",
+    "pricing_tier": null,
+    "num_parameters": 8000000000,
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "owner_userid": null,
+    "config": null,
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-04-18T08:34:37.676Z",
+    "update_at": "2024-04-18T09:12:37.169Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 3,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x050037E2D27C826eC7023868FD2A7bc3d3A76329": 1,
+        "0x74049692cFE55bA343E3a4AEA34Bc1Bef566683D": 1,
+        "0xF7C536aD4Bb0F90ED75eAEA7625FD5F16d590a47": 1
+      },
+      "asks_updated": "2024-05-11T06:51:42.031768854Z",
+      "gpus": { "": 0 },
+      "qps": 8.6,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 1792.0666666666666,
+      "throughput_out": 67.8,
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.04535547483134403,
+          "qps": 8.6,
+          "throughput_in": 1792.0666666666666,
+          "throughput_out": 67.8,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "66215c615f70248d1cd89d9f",
+    "name": "meta-llama/LlamaGuard-2-8b",
+    "display_name": "Meta Llama Guard 2 8B",
+    "display_type": "language",
+    "description": null,
+    "license": "Llama-3 (Other)",
+    "link": "",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "num_parameters": 8000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "owner_userid": null,
+    "config": null,
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-04-18T08:36:20.125Z",
+    "update_at": "2024-04-18T09:12:23.195Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 4,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x596711d7a0070782D77096054AFFFEe58A8Bd103": 1,
+        "0x711eAb4579879484478CE6b8cF03BfFc05C00352": 1,
+        "0xDD20d200A273BB014f5bA9E2f35911318e491Fc7": 1,
+        "0xd2492F50fC64e43df995B72310C4E6C66123A3eE": 1
+      },
+      "asks_updated": "2024-05-11T06:51:32.042258521Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.06422764227642279,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6620db134b2da307838b7cf2",
+    "name": "meta-llama/Meta-Llama-3-70B",
+    "display_name": "Meta Llama 3 70B",
+    "display_type": "language",
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "license": "",
+    "link": "https://huggingface.co/meta-llama/Meta-Llama-3-70B",
+    "creator_organization": "Meta",
+    "pricing_tier": null,
+    "num_parameters": 70000000000,
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "owner_userid": null,
+    "config": null,
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2024-04-18T08:34:27.131Z",
+    "update_at": "2024-04-18T08:34:27.131Z",
+    "autopilot_pool": "cr-a100-80-2x",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x64fdF920b9FD14b19eE5E26722815b611A9969f6": 1 },
+      "asks_updated": "2024-05-10T18:50:48.909389392Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0.03333333333333333,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "661d747e2bfa86bd832690c1",
+    "name": "microsoft/WizardLM-2-8x22B",
+    "display_name": "WizardLM-2 (8x22B)",
+    "display_type": "chat",
+    "description": "WizardLM-2 8x22B is Wizard's most advanced model, demonstrates highly competitive performance compared to those leading proprietary works and consistently outperforms all the existing state-of-the-art opensource models.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/microsoft/WizardLM-2-8x22B",
+    "creator_organization": "microsoft",
+    "pricing_tier": "Featured",
+    "num_parameters": 141000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 65536,
+    "owner_userid": null,
+    "config": {
+      "prompt_format": null,
+      "stop": ["</s>"],
+      "chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'system' %}{{ message['content'] + ' ' }}{% elif message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + eos_token + '\\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT: ' }}{% endif %}",
+      "add_generation_prompt": true,
+      "bos_token": "<s>",
+      "eos_token": "</s>"
+    },
+    "pricing": { "input": 300, "output": 300, "hourly": 0 },
+    "created_at": "2024-04-15T18:39:58.959Z",
+    "update_at": "2024-04-15T18:39:58.959Z",
+    "instances": [
+      { "avzone": "us-east-1a", "cluster": "happypiglet" },
+      { "avzone": "us-east-2a", "cluster": "jumpyjackal" }
+    ],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x2C8A1eFdc1D636f96e2Fea8B19E54B8a7aD021b2": 1,
+        "0xBd84E6F6Cf17E934BABcEa323b37fEe12b5B954c": 1,
+        "0xe82fd7645e8520bbB23989fda5d89B3014089d91": 1
+      },
+      "asks_updated": "2024-05-11T05:55:32.53293869Z",
+      "gpus": { "": 0 },
+      "qps": 0.33333333333333337,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 1390.4666666666667,
+      "throughput_out": 79.13333333333333,
+      "error_rate": 0.13333333333333333,
+      "retry_rate": 0.7333333333333334,
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 1.6833333333333336,
+          "qps": 0.2,
+          "throughput_in": 1100.6666666666667,
+          "throughput_out": 38.333333333333336,
+          "error_rate": 0.06666666666666667,
+          "retry_rate": 0.4666666666666667
+        },
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 1.1199999999999999,
+          "qps": 0.13333333333333333,
+          "throughput_in": 289.8,
+          "throughput_out": 40.8,
+          "error_rate": 0.06666666666666667,
+          "retry_rate": 0.26666666666666666
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65b40661251b2ff9f146d8ba",
+    "name": "microsoft/phi-2",
+    "display_name": "Microsoft Phi-2",
+    "display_type": "language",
+    "description": "Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)",
+    "license": "mit",
+    "link": "https://huggingface.co/microsoft/phi-2",
+    "creator_organization": "Microsoft",
+    "pricing_tier": "Featured",
+    "num_parameters": 2700000000,
+    "release_date": "2024-01-26T19:22:09.533Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2024-01-26T19:22:09.533Z",
+    "update_at": "2024-01-26T19:23:46.072Z",
+    "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }],
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x87392e41b7E545004263879B84b6142a49C5fF49": 1 },
+      "asks_updated": "2024-05-11T02:58:55.400693381Z",
+      "gpus": { "": 0 },
+      "qps": 0.06666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 39.666666666666664,
+      "throughput_out": 96.86666666666666,
+      "stats": [
+        {
+          "avzone": "us-central-2a",
+          "cluster": "jollyllama",
+          "capacity": 0,
+          "qps": 0.06666666666666667,
+          "throughput_in": 39.666666666666664,
+          "throughput_out": 96.86666666666666,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6514c873829715ded9cd17b1",
+    "name": "mistralai/Mistral-7B-Instruct-v0.1",
+    "display_name": "Mistral (7B) Instruct",
+    "display_type": "chat",
+    "description": "instruct fine-tuned version of Mistral-7B-v0.1",
+    "license": "Apache-2",
+    "creator_organization": "mistralai",
+    "hardware_label": "2x A100 80GB",
+    "num_parameters": 7241732096,
+    "release_date": "2023-09-27T00:00:00.000Z",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "add_generation_prompt": true,
+      "stop": ["[/INST]", "</s>"],
+      "prompt_format": "<s>[INST] {prompt} [/INST]",
+      "chat_template_name": "llama",
+      "tools_template": "{{ '<<SYS>>\\n' + systemMessage['content'] + '\\n\\nYou can access the following functions. Use them if required -\\n' + tools + '\\n<</SYS>>\\n\\n' + message['content'] }}"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-09-28T00:27:31.815Z",
+    "update_at": "2023-10-12T01:13:51.840Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 5,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x2940b4a8aC66Ea56De5E30E1b8117b1A2840183C": 1,
+        "0x8Ec3cC96947C568792b81B26ed32588F74EEA967": 1,
+        "0x8fdd35098544851F45a1AF21dE3F715aBaE775D3": 1,
+        "0xD2af9bC18606b1851EC31D25A70634399eeFa07f": 1,
+        "0xd2A54B882E5b8157aFdbaf0002a046420b316773": 1
+      },
+      "asks_updated": "2024-05-11T11:39:13.802166171Z",
+      "gpus": { "": 0 },
+      "qps": 7.6,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 1363.1333333333334,
+      "throughput_out": 543.4666666666667,
+      "retry_rate": 0.06666666666666667,
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0.1781045751633987,
+          "qps": 7.6,
+          "throughput_in": 1363.1333333333334,
+          "throughput_out": 543.4666666666667,
+          "error_rate": 0,
+          "retry_rate": 0.06666666666666667
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65776c7d6923087ddd5a660a",
+    "name": "mistralai/Mistral-7B-Instruct-v0.2",
+    "display_name": "Mistral (7B) Instruct v0.2",
+    "display_type": "chat",
+    "description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
+    "license": "apache-2.0",
+    "creator_organization": "mistralai",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "release_date": "2023-11-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["[/INST]", "</s>"],
+      "chat_template_name": "llama",
+      "tools_template": "{{ 'If you need to invoke any of the following functions:\n' + tools + '\nplease respond in the following JSON format:\n[\n\n  {\n    \"name\": \"the name of the function to be invoked\",\n    \"arguments\": {\"key1\": \"value1\", \"key2\": \"value2\", ...}\n  }\n]\nIf any required arguments are missing, please ask for them without JSON function calls.\nIf the instruction does not necessitate a function call, please provide your response in clear, concise natural language.\n\n' + message['content'] }}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-12-11T20:09:33.627Z",
+    "update_at": "2023-12-11T20:09:33.627Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "access": "",
+    "hardware_label": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xBfdCCF308cFc94E79C8E8547B98A908aEAE2378e": 1 },
+      "asks_updated": "2024-05-11T11:45:09.032835704Z",
+      "gpus": { "": 0 },
+      "qps": 1.6666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 2454.866666666667,
+      "throughput_out": 136.66666666666666,
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0.18639455782312941,
+          "qps": 1.6666666666666667,
+          "throughput_in": 2454.866666666667,
+          "throughput_out": 136.66666666666666,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6514c6ee829715ded9cd17b0",
+    "name": "mistralai/Mistral-7B-v0.1",
+    "display_name": "Mistral (7B)",
+    "display_type": "language",
+    "description": "7.3B parameter model that outperforms Llama 2 13B on all benchmarks, approaches CodeLlama 7B performance on code, Uses Grouped-query attention (GQA) for faster inference and Sliding Window Attention (SWA) to handle longer sequences at smaller cost",
+    "license": "Apache-2",
+    "creator_organization": "mistralai",
+    "hardware_label": "2x A100 80GB",
+    "num_parameters": 7241732096,
+    "release_date": "2023-09-27T00:00:00.000Z",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": { "stop": ["</s>"], "prompt_format": "{prompt}", "chat_template_name": "llama" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-09-28T00:21:02.330Z",
+    "update_at": "2023-09-28T00:21:02.330Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x5a5E928538914B96C7EC31617cD026F8C92F7ad8": 1 },
+      "asks_updated": "2024-05-10T15:42:13.775244857Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6620059786c156450dc1e445",
+    "name": "mistralai/Mixtral-8x22B-Instruct-v0.1",
+    "display_name": "Mixtral-8x22B Instruct v0.1",
+    "display_type": "chat",
+    "description": "The Mixtral-8x22B-Instruct-v0.1 Large Language Model (LLM) is an instruct fine-tuned version of the Mixtral-8x22B-v0.1.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1",
+    "creator_organization": "mistralai",
+    "pricing_tier": "Featured",
+    "num_parameters": 141000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 65536,
+    "owner_userid": null,
+    "config": {
+      "stop": ["</s>", "[/INST]"],
+      "chat_template": "{{bos_token}}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\n' + system_message + '\n<</SYS>>\n\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\n' + content + '\n<</SYS>>\n\n' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content + ' ' + eos_token }}{% endif %}{% endfor %}",
+      "bos_token": "<s>",
+      "eos_token": "</s>"
+    },
+    "pricing": { "input": 300, "output": 300, "hourly": 0 },
+    "created_at": "2024-04-17T17:23:35.226Z",
+    "update_at": "2024-05-03T01:20:25.932Z",
+    "instances": [
+      { "avzone": "us-central-6a", "cluster": "mirthfulmonkey" },
+      { "avzone": "eu-central-1a", "cluster": "merrymeerkat" },
+      { "avzone": "us-south-1a", "cluster": "mustymarfa" }
+    ],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 3,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x0d23D3C623ed85942E3e521C45f6513161F7F97d": 1,
+        "0x1beCfdc13118Ff40C81de8eD43E375C87C95212D": 1,
+        "0x2da9932EA4909E6d2CDB6b8E551a690F6c054c77": 1,
+        "0xB2091d3D7166e8BA28a835AF2a2Ec4d71e774f8D": 1,
+        "0xF081B01E37A100ff8E1ef380C6D8Dd29098355D2": 1
+      },
+      "asks_updated": "2024-05-10T22:34:03.05610807Z",
+      "gpus": { "": 0 },
+      "qps": 0.4666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 214.59999999999997,
+      "throughput_out": 62.33333333333333,
+      "stats": [
+        {
+          "avzone": "us-central-6a",
+          "cluster": "mirthfulmonkey",
+          "capacity": 0.125,
+          "qps": 0.2,
+          "throughput_in": 92.33333333333333,
+          "throughput_out": 26.4,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "eu-central-1a",
+          "cluster": "merrymeerkat",
+          "capacity": 0.16666666666666666,
+          "qps": 0.06666666666666667,
+          "throughput_in": 30.4,
+          "throughput_out": 8.866666666666667,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-south-1a",
+          "cluster": "mustymarfa",
+          "capacity": 0,
+          "qps": 0.2,
+          "throughput_in": 91.86666666666666,
+          "throughput_out": 27.066666666666666,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "66165fa701f2f8a98997bf8e",
+    "name": "mistralai/Mixtral-8x22B",
+    "display_name": "Mixtral-8x22B",
+    "display_type": "language",
+    "description": "The Mixtral-8x22B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
+    "license": "apache-2.0",
+    "link": "",
+    "creator_organization": "mistralai",
+    "pricing_tier": "Featured",
+    "num_parameters": 138000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 65536,
+    "owner_userid": null,
+    "config": {
+      "prompt_format": null,
+      "stop": ["</s>"],
+      "chat_template_name": null,
+      "chat_template": null
+    },
+    "pricing": { "input": 300, "output": 300, "hourly": 0 },
+    "created_at": "2024-04-10T09:45:11.291Z",
+    "update_at": "2024-04-10T09:45:11.291Z",
+    "instances": [{ "avzone": "us-central-5b", "cluster": "blusterybull" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xB5c9cBC845ecB19eF96AfD7c685C249063D045B9": 1 },
+      "asks_updated": "2024-05-10T22:30:33.867778162Z",
+      "gpus": { "": 0 },
+      "qps": 0.13333333333333333,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 765.5333333333333,
+      "throughput_out": 17.733333333333334,
+      "stats": [
+        {
+          "avzone": "us-central-5b",
+          "cluster": "blusterybull",
+          "capacity": 0.14814814814814814,
+          "qps": 0.13333333333333333,
+          "throughput_in": 765.5333333333333,
+          "throughput_out": 17.733333333333334,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6577af4434e6c1e2bb5283d8",
+    "name": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "display_name": "Mixtral-8x7B Instruct v0.1",
+    "display_type": "chat",
+    "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "creator_organization": "mistralai",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "56000000000",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["[/INST]", "</s>"],
+      "chat_template_name": "llama",
+      "tools_template": "{{ '<<SYS>>\\n' + systemMessage['content'] + '\\n\\nYou can access the following functions. Use them if required -\\n' + tools + '\\n<</SYS>>\\n\\n' + message['content'] }}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 150, "output": 150, "hourly": 0 },
+    "created_at": "2023-12-12T00:54:28.108Z",
+    "update_at": "2024-02-08T07:58:24.624Z",
+    "autopilot_pool": "cr-a100-80-2x",
+    "instances": [
+      { "avzone": "us-central-5b", "cluster": "blusterybull" },
+      { "avzone": "us-south-1a", "cluster": "mustymarfa" },
+      { "avzone": "us-central-6a", "cluster": "mirthfulproxy2" }
+    ],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 9,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x087B53d30E40D00f2f9FB6b5320F869b0440b3d2": 1,
+        "0x1f9b37D43762A2E68f79f27037970F252Ae9dc72": 1,
+        "0x278DE515De2340c72109ef8DaEf2142cD50dE05a": 1,
+        "0x304C274001CFe1eE95a69F28aC7Bd2DE696Fe31F": 1,
+        "0x331ad91912c531dCC1c9dF21d624D05A83FA8798": 1,
+        "0x3Fb77Dfc9Fb62f547C877eeD099836F714862e75": 1,
+        "0x6E7Bea97F507e915D455f7480a12BFFbD50f8F0B": 1,
+        "0xDc4d873003AE654ed69d4B2c460d9525F0B82322": 1,
+        "0xEdb6fdfbcb1Fb0438275066e5314D44252A54A5c": 1,
+        "0xd40bD5046cfDC4AcB83DD0c37c0Bae8761c77785": 1,
+        "0xfa3161803d23a65ffcD5f31d1aA17e8A77c9F416": 1
+      },
+      "asks_updated": "2024-05-11T00:35:53.627294183Z",
+      "gpus": { "": 0 },
+      "qps": 3,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 11466.333333333334,
+      "throughput_out": 282.9333333333333,
+      "stats": [
+        {
+          "avzone": "us-central-5b",
+          "cluster": "blusterybull",
+          "capacity": 0.07692307692307691,
+          "qps": 0.8,
+          "throughput_in": 1433.1333333333334,
+          "throughput_out": 69.33333333333333,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-south-1a",
+          "cluster": "mustymarfa",
+          "capacity": 0.09677419354838712,
+          "qps": 0.9333333333333333,
+          "throughput_in": 4351.6,
+          "throughput_out": 78,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-central-6a",
+          "cluster": "mirthfulproxy2",
+          "capacity": 0.08333333333333331,
+          "qps": 1.2666666666666666,
+          "throughput_in": 5681.6,
+          "throughput_out": 135.6,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6577bf1034e6c1e2bb5283d9",
+    "name": "mistralai/Mixtral-8x7B-v0.1",
+    "display_name": "Mixtral-8x7B v0.1",
+    "display_type": "language",
+    "description": "The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/mistralai/Mixtral-8x7B-v0.1",
+    "creator_organization": "mistralai",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "56000000000",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "pricing": { "input": 150, "output": 150, "hourly": 0 },
+    "created_at": "2023-12-12T02:01:52.674Z",
+    "update_at": "2024-02-08T07:58:39.848Z",
+    "autopilot_pool": "cr-a100-80-2x",
+    "instances": [{ "avzone": "us-central-6a", "cluster": "mirthfulproxy2" }],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xc261f35de549E945122BFd444d947873cb8ca48c": 1 },
+      "asks_updated": "2024-05-10T15:20:17.076895388Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-6a",
+          "cluster": "mirthfulproxy2",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "657b7a2a84ef58c3562de91e",
+    "name": "openchat/openchat-3.5-1210",
+    "display_name": "OpenChat 3.5",
+    "display_type": "chat",
+    "description": "A merge of OpenChat 3.5 was trained with C-RLFT on a collection of publicly available high-quality instruction data, with a custom processing pipeline.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/openchat/openchat-3.5-1210",
+    "creator_organization": "OpenChat",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "7000000000",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "config": {
+      "chat_template": "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'] + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}",
+      "stop": ["<|end_of_turn|>", "</s>"],
+      "add_generation_prompt": true,
+      "bos_token": "<s>",
+      "prompt_format": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-12-14T21:56:58.576Z",
+    "update_at": "2023-12-14T21:56:58.576Z",
+    "instances": [{ "avzone": "ap-northeast-1a", "cluster": "optimisticotter" }],
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x7c2e432720fC11Cd177eFf01BD7Fb55B705EFB2E": 1 },
+      "asks_updated": "2024-05-10T16:30:29.082656593Z",
+      "gpus": { "": 0 },
+      "qps": 0.3333333333333333,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 521.8,
+      "throughput_out": 51.333333333333336,
+      "stats": [
+        {
+          "avzone": "ap-northeast-1a",
+          "cluster": "optimisticotter",
+          "capacity": 0.03667953667953668,
+          "qps": 0.3333333333333333,
+          "throughput_in": 521.8,
+          "throughput_out": 51.333333333333336,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64aced5c227f790586239d2b",
+    "name": "prompthero/openjourney",
+    "display_name": "Openjourney v4",
+    "display_type": "image",
+    "description": "An open source Stable Diffusion model fine tuned model on Midjourney images. ",
+    "license": "creativeml-openrail-m",
+    "link": "https://huggingface.co/prompthero/openjourney",
+    "creator_organization": "Prompt Hero",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": 13000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "external_pricing_url": "https://www.together.xyz/apis#pricing",
+    "config": { "height": 512, "width": 512, "steps": 20, "number_of_images": 2, "seed": 42 },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-07-11T05:49:16.586Z",
+    "update_at": "2023-07-11T05:49:16.586Z",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 1,
+      "num_running": 1,
+      "asks": {
+        "0x5C5b60Ea2C7046FDdf7F7be3853d046301334a85": 1,
+        "0xB2bFeaa446Cc0376249ed2d7a8f5C32E0705e556": 1
+      },
+      "asks_updated": "2024-05-11T09:07:32.233340141Z",
+      "gpus": { "NVIDIA A40": 2 },
+      "options": { "input=text,image": 2 },
+      "qps": 0.013067961,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 0.23658435
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1312907e072b8aece1",
+    "name": "runwayml/stable-diffusion-v1-5",
+    "display_name": "Stable Diffusion 1.5",
+    "display_type": "image",
+    "description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
+    "license": "creativeml-openrail-m",
+    "link": "https://huggingface.co/runwayml/stable-diffusion-v1-5",
+    "creator_organization": "Runway ML",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "featured",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "external_pricing_url": "https://www.together.xyz/apis#pricing",
+    "config": { "height": 512, "width": 512, "steps": 20, "number_of_images": 2, "seed": 42 },
+    "created_at": "2023-06-23T20:22:43.572Z",
+    "update_at": "2023-06-23T20:22:43.572Z",
+    "access": "",
+    "descriptionLink": "",
+    "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 },
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x98D41CFC96e488D9810431B65Aa98EBfc87b73c8": 1 },
+      "asks_updated": "2024-05-11T02:38:42.957010375Z",
+      "gpus": { "NVIDIA A40": 1 },
+      "options": { "input=text,image": 1 },
+      "qps": 0.015545072,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 0.23383653
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65460075c5ce2e5fa70d6721",
+    "name": "sentence-transformers/msmarco-bert-base-dot-v5",
+    "display_name": "Sentence-BERT",
+    "display_type": "embedding",
+    "description": "A sentence-transformers model: it maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/sentence-transformers/msmarco-bert-base-dot-v5",
+    "creator_organization": "Together",
+    "hardware_label": "L40",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 110000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 512,
+    "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 },
+    "created_at": "2023-11-04T08:27:33.867Z",
+    "update_at": "2023-12-22T03:15:44.832Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 4,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x1a1b0dB24Fdfd5E05AF9177A80fbB0C049a3b63b": 1,
+        "0x662c7EE2ca9D3D4fAbcEE2286C1bbc5f24CA02fD": 1,
+        "0x834Dfa4EeF072100CcBC96fA3871d6f62Ce02455": 1,
+        "0xfE0CBc639aB99C5995B77cBd6aCCB0F29208186D": 1
+      },
+      "asks_updated": "2024-05-11T03:04:31.159265923Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65b454f3d9877b0bd1376470",
+    "name": "snorkelai/Snorkel-Mistral-PairRM-DPO",
+    "display_name": "Snorkel Mistral PairRM DPO (7B)",
+    "display_type": "chat",
+    "description": "A state-of-the-art model by Snorkel AI, DPO fine-tuned on Mistral-7B",
+    "license": "apache-2.0",
+    "creator_organization": "Snorkel AI",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "release_date": "2024-01-27T00:57:23.638Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["[/INST]", "</s>"],
+      "chat_template_name": "llama",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-01-27T00:57:23.638Z",
+    "update_at": "2024-01-27T14:24:41.745Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "access": "",
+    "hardware_label": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x905d9333Bf36FC9fD26b130adaaEe6f5Bd4E800f": 1 },
+      "asks_updated": "2024-05-10T18:09:23.39444282Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.1111111111111111,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acef00227f790586239d3b",
+    "name": "stabilityai/stable-diffusion-2-1",
+    "display_name": "Stable Diffusion 2.1",
+    "display_type": "image",
+    "description": "Latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
+    "license": "openrail++",
+    "link": "https://huggingface.co/stabilityai/stable-diffusion-2-1",
+    "creator_organization": "Stability AI",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "featured",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "external_pricing_url": "https://www.together.xyz/apis#pricing",
+    "created_at": "2023-06-23T20:22:43.572Z",
+    "update_at": "2023-06-23T20:22:43.572Z",
+    "access": "",
+    "descriptionLink": "",
+    "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 },
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xC9494f3A014EAC6DD43De5b03E03364F1AcC9ea7": 1 },
+      "asks_updated": "2024-05-11T02:40:07.5915129Z",
+      "gpus": { "NVIDIA A100 80GB PCIe": 1 },
+      "options": { "input=text,image": 1 },
+      "qps": 0.02694962,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 2.008309
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64c9890c689aa3b286cfcff9",
+    "name": "stabilityai/stable-diffusion-xl-base-1.0",
+    "display_name": "Stable Diffusion XL 1.0",
+    "display_type": "image",
+    "description": "A text-to-image generative AI model that excels at creating 1024x1024 images.",
+    "license": "openrail++",
+    "link": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
+    "creator_organization": "Stability AI",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "external_pricing_url": "https://www.together.xyz/apis#pricing",
+    "config": {
+      "seed": 1000,
+      "height": 1024,
+      "width": 1024,
+      "steps": 40,
+      "number_of_images": 4,
+      "optimized": {
+        "512x512": "-512-576-1024",
+        "576x1024": "-512-576-1024",
+        "1024x576": "-512-576-1024",
+        "1024x1024": "-512-576-1024"
+      }
+    },
+    "created_at": "2023-08-01T22:37:00.851Z",
+    "update_at": "2023-08-01T22:37:00.851Z",
+    "descriptionLink": "",
+    "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 },
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x2E595c6ee5e62FeFF9f426b239a2fB0970476593": 1 },
+      "asks_updated": "2024-05-11T02:42:33.99917055Z",
+      "gpus": { "NVIDIA A100 80GB PCIe": 1 },
+      "options": { "input=text,image": 1 },
+      "qps": 0.018970164,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 0.9324918
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "653c053fd9679a84df55c4e7",
+    "name": "teknium/OpenHermes-2-Mistral-7B",
+    "display_name": "OpenHermes-2-Mistral (7B)",
+    "display_type": "chat",
+    "description": "State of the art Mistral Fine-tuned on extensive public datasets",
+    "license": "Apache-2",
+    "creator_organization": "teknium",
+    "hardware_label": "A40",
+    "pricing_tier": "Featured",
+    "num_parameters": 7241732096,
+    "release_date": "2023-10-27T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "config": {
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "pre_prompt": "<|im_start|>system\nYou are thoughtful, helpful, polite, honest, and friendly<|im_end|>\n",
+      "add_generation_prompt": true,
+      "chat_template_name": "default"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-10-27T18:45:19.307Z",
+    "update_at": "2023-10-27T23:53:05.438Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 3,
+      "num_bids": 2,
+      "num_running": 2,
+      "asks": { "0x24e7c0F944a664e4be6890a13Ce3cB0b930a2d9b": 3 },
+      "asks_updated": "2024-05-10T15:23:33.984301148Z",
+      "gpus": { "": 0 },
+      "qps": 0.3333333333333333,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 583.4,
+      "throughput_out": 66.66666666666667,
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0.21153846153846154,
+          "qps": 0.3333333333333333,
+          "throughput_in": 583.4,
+          "throughput_out": 66.66666666666667,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "655667fe6664bf7229b2dc6c",
+    "name": "teknium/OpenHermes-2p5-Mistral-7B",
+    "display_name": "OpenHermes-2.5-Mistral (7B)",
+    "display_type": "chat",
+    "description": "Continuation of OpenHermes 2 Mistral model trained on additional code datasets",
+    "license": "Apache-2",
+    "creator_organization": "teknium",
+    "hardware_label": "A40",
+    "pricing_tier": "Featured",
+    "num_parameters": 7241732096,
+    "release_date": "2023-11-15T00:00:00.000Z",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "config": {
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "add_generation_prompt": true,
+      "chat_template_name": "default"
+    },
+    "pricing": { "input": 50, "output": 50 },
+    "created_at": "2023-11-16T19:05:34.976Z",
+    "update_at": "2023-11-16T19:12:24.883Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x0C761d48a95cFbc1969e97099a212D033AfE4c32": 1,
+        "0x478820b73D0745eBFB0E01a0de2c989dA2bC2fD8": 1
+      },
+      "asks_updated": "2024-05-11T07:42:07.630477632Z",
+      "gpus": { "": 0 },
+      "qps": 2.6666666666666665,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 3478.8,
+      "throughput_out": 391.1333333333333,
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.46472019464720205,
+          "qps": 2.6666666666666665,
+          "throughput_in": 3478.8,
+          "throughput_out": 391.1333333333333,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1312907e072b8aece3",
+    "name": "togethercomputer/GPT-JT-Moderation-6B",
+    "display_name": "GPT-JT-Moderation (6B)",
+    "display_type": "language",
+    "description": "This model can be used to moderate other chatbot models. Built using GPT-JT model fine-tuned on Ontocord.ai's OIG-moderation dataset v0.1.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/togethercomputer/GPT-JT-Moderation-6B",
+    "creator_organization": "Together",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": 6700000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "config": {
+      "chat_template": "Possible labels:\n1. casual\n2. needs caution\n3. needs intervention\n4. possibly needs caution\n5. probably needs caution\nInput: Hi!\nOutput: casual\nInput: I want to kill my cat.\nOutput: needs intervention\nInput: How to poison my dog?\nOutput: needs intervention\nInput: I don't want to talk to my brother\nOutput: probably needs caution\nInput: Today we had a disagreement and  \nwe got into a screaming fight. I don't want to talk.\nOutput: probably needs caution\nInput: My brother is being an asshole... Fuck him.\nOutput: needs caution\nInput: Hello.\nOutput: casual\nInput: {% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}\nOutput:",
+      "safety_label": "casual",
+      "safety_config": { "temperature": 0.2, "top_p": 1, "max_tokens": 10, "stop": ["\n"] }
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:43.657Z",
+    "update_at": "2023-06-23T20:22:43.657Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "testytiger" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x3B2a3D16B0207aA9F14726eeE8cD0b9b388209A2": 1 },
+      "asks_updated": "2024-05-10T17:30:19.429822841Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "testytiger",
+          "capacity": 0.05,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64c28e8742fa06a9511509d1",
+    "name": "togethercomputer/LLaMA-2-7B-32K",
+    "display_name": "LLaMA-2-32K (7B)",
+    "display_type": "language",
+    "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations.",
+    "license": "Meta license",
+    "link": "https://huggingface.co/togethercomputer/LLaMA-2-7B-32K",
+    "creator_organization": "Together",
+    "hardware_label": "2x A100 80GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": "6738415616",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": { "stop": ["\n\n\n\n", "<|endoftext|>"], "chat_template_name": "llama" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-27T15:34:31.581Z",
+    "update_at": "2023-08-17T17:07:36.346Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x371EAf96c8Ee3BA499F0288c75c75d51112b2527": 1 },
+      "asks_updated": "2024-05-11T09:43:39.29703399Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64de96090d052d10425df3c9",
+    "name": "togethercomputer/Llama-2-7B-32K-Instruct",
+    "display_name": "LLaMA-2-7B-32K-Instruct (7B)",
+    "display_type": "chat",
+    "description": "Extending LLaMA-2 to 32K context, built with Meta's Position Interpolation and Together AI's data recipe and system optimizations, instruction tuned by Together",
+    "license": "Meta license",
+    "link": "https://huggingface.co/togethercomputer/Llama-2-7B-32K-Instruct",
+    "creator_organization": "Together",
+    "hardware_label": "2X A100 80GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "prompt_format": "[INST]\n {prompt} \n[/INST]\n\n",
+      "stop": ["[INST]", "\n\n"],
+      "chat_template_name": "llama"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-27T15:34:31.581Z",
+    "update_at": "2023-08-17T17:07:36.346Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "isFinetuned": false,
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 6,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x5EE34cdD225705f90B9e7F5CD9502B856f6c87BA": 1,
+        "0x6B5fee8e0895Bfa4AC5E9CCE41A6675899BEd4DF": 1,
+        "0x6fD51Dd59783f9FD230d1c7774AD36e9B7581857": 1,
+        "0x97c68E4Be603a6c4Bd4DDFa79B250B4EA92301cE": 1,
+        "0xa9dA26E9cd4a33BEd521d35f632c9b7bf0fBB712": 1,
+        "0xfa66a48eA14D80e094D57E0ECdF22Fc0B397D193": 1
+      },
+      "asks_updated": "2024-05-11T10:28:34.70870094Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 1,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1412907e072b8aeceb",
+    "name": "togethercomputer/RedPajama-INCITE-7B-Base",
+    "display_name": "RedPajama-INCITE (7B)",
+    "display_type": "language",
+    "description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).",
+    "descriptionLink": "https://www.together.xyz/blog/redpajama-models-v1",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Base",
+    "creator_organization": "Together",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": "6857302016",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "config": { "chat_template_name": "gpt" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:44.033Z",
+    "update_at": "2023-06-23T20:22:44.033Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x3665A75f9d8F32a7721B1b43c4CA2eC18F7bdDD3": 1 },
+      "asks_updated": "2024-05-11T02:42:29.667700181Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0.0078125,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1412907e072b8aeced",
+    "name": "togethercomputer/RedPajama-INCITE-7B-Chat",
+    "display_name": "RedPajama-INCITE Chat (7B)",
+    "display_type": "chat",
+    "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-7B-v1 base model.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Chat",
+    "creator_organization": "Together",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": "6857302016",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "config": {
+      "prompt_format": "<human>: {prompt}\n<bot>:",
+      "stop": ["<human>"],
+      "chat_template_name": "gpt",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:44.190Z",
+    "update_at": "2023-06-23T20:22:44.190Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x5e066227A1e7f634cAEFaDc21527340A7E33a8d5": 1 },
+      "asks_updated": "2024-05-11T02:42:20.102603502Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0.0078125,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1412907e072b8aecec",
+    "name": "togethercomputer/RedPajama-INCITE-7B-Instruct",
+    "display_name": "RedPajama-INCITE Instruct (7B)",
+    "display_type": "language",
+    "description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-7B-v1 base model.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Instruct",
+    "creator_organization": "Together",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": "6857302016",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "config": { "chat_template_name": "gpt" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:44.083Z",
+    "update_at": "2023-06-23T20:22:44.083Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xf692d4ef74617ec76153cC5D32C3b8b9bD5D2B2C": 1 },
+      "asks_updated": "2024-05-11T02:41:42.647429798Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0.0078125,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1312907e072b8aece5",
+    "name": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
+    "display_name": "RedPajama-INCITE (3B)",
+    "display_type": "language",
+    "description": "Base model that aims to replicate the LLaMA recipe as closely as possible (blog post).",
+    "descriptionLink": "https://www.together.xyz/blog/redpajama-models-v1",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-Base-3B-v1",
+    "creator_organization": "Together",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": "2775864320",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "config": { "chat_template_name": "gpt" },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:43.751Z",
+    "update_at": "2023-06-23T20:22:43.751Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xe03dDF7b87500172ec519A5cf7982166CB27446d": 1 },
+      "asks_updated": "2024-05-11T02:39:13.60509537Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0.0078125,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1312907e072b8aece7",
+    "name": "togethercomputer/RedPajama-INCITE-Chat-3B-v1",
+    "display_name": "RedPajama-INCITE Chat (3B)",
+    "display_type": "chat",
+    "description": "Chat model fine-tuned using data from Dolly 2.0 and Open Assistant over the RedPajama-INCITE-Base-3B-v1 base model.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-Chat-3B-v1",
+    "creator_organization": "Together",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": "2775864320",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "config": {
+      "add_generation_prompt": true,
+      "prompt_format": "<human>: {prompt}\n<bot>:",
+      "stop": ["<human>"],
+      "chat_template_name": "gpt"
+    },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:43.839Z",
+    "update_at": "2023-06-23T20:22:43.839Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x4e6FE2a25309efA6b3279d5FacceA7393Bce9d7d": 1 },
+      "asks_updated": "2024-05-11T02:42:34.056810771Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0.0078125,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1312907e072b8aece6",
+    "name": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1",
+    "display_name": "RedPajama-INCITE Instruct (3B)",
+    "display_type": "language",
+    "description": "Designed for few-shot prompts, fine-tuned over the RedPajama-INCITE-Base-3B-v1 base model.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/togethercomputer/RedPajama-INCITE-Instruct-3B-v1",
+    "creator_organization": "Together",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": "2775864320",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "config": { "chat_template_name": "gpt" },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:43.796Z",
+    "update_at": "2023-06-23T20:22:43.796Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x05DcC82776d9dE9f88714a57489F55Fe629253Df": 1 },
+      "asks_updated": "2024-05-11T02:40:57.020944885Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0.0078125,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65735df36923087ddd5a6607",
+    "name": "togethercomputer/StripedHyena-Hessian-7B",
+    "display_name": "StripedHyena Hessian (7B)",
+    "display_type": "language",
+    "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers",
+    "license": "Apache-2",
+    "creator_organization": "Together",
+    "hardware_label": "H100",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "release_date": "2023-11-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "pricing": { "input": 50, "output": 50 },
+    "created_at": "2023-12-08T18:18:27.005Z",
+    "update_at": "2023-12-08T19:03:32.567Z",
+    "instances": [{ "avzone": "ap-northeast-1a", "cluster": "optimisticotter" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x67950b5BFae9be9c326c098be6ED4C6eBfDF21AC": 1 },
+      "asks_updated": "2024-05-10T17:29:30.55649053Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "ap-northeast-1a",
+          "cluster": "optimisticotter",
+          "capacity": 0.03571428571428571,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65735d536923087ddd5a6606",
+    "name": "togethercomputer/StripedHyena-Nous-7B",
+    "display_name": "StripedHyena Nous (7B)",
+    "display_type": "chat",
+    "description": "A hybrid architecture composed of multi-head, grouped-query attention and gated convolutions arranged in Hyena blocks, different from traditional decoder-only Transformers",
+    "license": "Apache-2",
+    "creator_organization": "Together",
+    "hardware_label": "H100",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "release_date": "2023-11-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "config": {
+      "stop": ["###", "</s>"],
+      "prompt_format": "### Instruction:\n{prompt}\n\n### Response:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + '### Instruction:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'system' %}{{ '### System:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response:\\n'  + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ '### Response:\\n' }}{% endif %}{% endfor %}",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50 },
+    "created_at": "2023-12-08T18:15:47.433Z",
+    "update_at": "2023-12-08T19:03:11.497Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xDf09DC5Df2B0116b09cB52E358e1bAbdE797c383": 1 },
+      "asks_updated": "2024-05-10T22:20:02.651692436Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0.05263157894736842,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64ace317227f790586239ce2",
+    "name": "togethercomputer/alpaca-7b",
+    "display_name": "Alpaca (7B)",
+    "display_type": "chat",
+    "description": "Fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations. ",
+    "license": "cc-by-nc-4.0",
+    "link": "https://huggingface.co/tatsu-lab/alpaca-7b-wdiff",
+    "creator_organization": "Stanford",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 2048,
+    "config": {
+      "stop": ["</s>", "###"],
+      "add_generation_prompt": true,
+      "prompt_format": "### Instruction:\n{prompt}\n### Response:\n",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-11T05:05:27.713Z",
+    "update_at": "2023-07-11T05:05:27.713Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xCDb16b84A6C85ceEa98b9A423fFc3DB6c94B79ba": 1 },
+      "asks_updated": "2024-05-11T02:41:04.792218224Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65df8df823e6726c2d053851",
+    "name": "togethercomputer/evo-1-131k-base",
+    "display_name": "Evo-1 Base (131K)",
+    "display_type": "language",
+    "description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/togethercomputer/evo-1-131k-base",
+    "creator_organization": "Together",
+    "pricing_tier": "Featured",
+    "num_parameters": 6450000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 131073,
+    "pricing": { "input": 500, "output": 500, "hourly": 0 },
+    "created_at": "2024-02-28T19:48:08.106Z",
+    "update_at": "2024-02-28T19:48:08.106Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x23e316184d0b6427b720e31f9390617C2094C570": 1 },
+      "asks_updated": "2024-05-10T16:39:02.557083364Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65df8d9623e6726c2d053850",
+    "name": "togethercomputer/evo-1-8k-base",
+    "display_name": "Evo-1 Base (8K)",
+    "display_type": "language",
+    "description": "Evo is a biological foundation model capable of long-context modeling and design. Evo uses the StripedHyena architecture to enable modeling of sequences at a single-nucleotide, byte-level resolution with near-linear scaling of compute and memory relative to context length. Evo has 7 billion parameters and is trained on OpenGenome, a prokaryotic whole-genome dataset containing ~300 billion tokens.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/togethercomputer/evo-1-8k-base",
+    "creator_organization": "Together",
+    "pricing_tier": "Featured",
+    "num_parameters": 6450000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "pricing": { "input": 500, "output": 500, "hourly": 0 },
+    "created_at": "2024-02-28T19:46:30.585Z",
+    "update_at": "2024-04-19T18:58:00.962Z",
+    "instances": [{ "avzone": "us-central-5a", "cluster": "wrigleycub" }],
+    "isPrivate": false,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x80D5014da1fBfBe7069d979125Ebc6EFB4e21446": 1 },
+      "asks_updated": "2024-05-10T11:50:39.384174189Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-5a",
+          "cluster": "wrigleycub",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6553b8da6664bf7229b2dbfb",
+    "name": "togethercomputer/m2-bert-80M-2k-retrieval",
+    "display_name": "M2-BERT-Retrieval-2K",
+    "display_type": "embedding",
+    "description": "M2-BERT from the Monarch Mixer paper fine-tuned for retrieval",
+    "license": "Apache-2",
+    "creator_organization": "Together",
+    "hardware_label": "L40",
+    "pricing_tier": "Featured",
+    "num_parameters": 80000000,
+    "release_date": "2023-11-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 },
+    "created_at": "2023-11-14T18:13:46.901Z",
+    "update_at": "2024-02-21T20:06:27.968Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x49c8561e8042ef95A4b011A0AB216d6171aAb80a": 1,
+        "0xD797d4629f4ec41203c03e9F417A1b26C165429c": 1
+      },
+      "asks_updated": "2024-05-11T02:57:23.797140598Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6585058be7e2e898e81b5401",
+    "name": "togethercomputer/m2-bert-80M-32k-retrieval",
+    "display_name": "M2-BERT-Retrieval-32k",
+    "display_type": "embedding",
+    "description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/togethercomputer/m2-bert-80M-32k-retrieval",
+    "creator_organization": "Together",
+    "hardware_label": "L40",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 80000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 32768,
+    "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 },
+    "created_at": "2023-11-04T17:57:24.532Z",
+    "update_at": "2023-11-04T17:57:24.532Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 3,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x642D1948083206E1B93C913F7540075bAAbdeE09": 1,
+        "0xac486C2C89f7954b1Eb96281162268cFcCF8b113": 1,
+        "0xc6B6cfB1A480437a012553De16eD296a73a8fB68": 1
+      },
+      "asks_updated": "2024-05-11T02:48:07.105736356Z",
+      "gpus": { "": 0 },
+      "qps": 0.06666666666666667,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 3360.0666666666666,
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0.18125,
+          "qps": 0.06666666666666667,
+          "throughput_in": 3360.0666666666666,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65468604c5ce2e5fa70d6722",
+    "name": "togethercomputer/m2-bert-80M-8k-retrieval",
+    "display_name": "M2-BERT-Retrieval-8k",
+    "display_type": "embedding",
+    "description": "The 80M checkpoint for M2-BERT-base from the paper Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture with sequence length 8192, and it has been fine-tuned for retrieval.",
+    "license": "apache-2.0",
+    "link": "https://huggingface.co/togethercomputer/m2-bert-80M-8k-retrieval",
+    "creator_organization": "Together",
+    "hardware_label": "L40",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 80000000,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 8192,
+    "pricing": { "hourly": 0, "input": 2, "output": 2, "finetune": 0, "base": 0 },
+    "created_at": "2023-11-04T17:57:24.532Z",
+    "update_at": "2023-11-04T17:57:24.532Z",
+    "instances": [{ "avzone": "us-central-1a", "cluster": "sassyseal" }],
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 3,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x3aA308521998f41e8B32704d6572468904c5068c": 1,
+        "0x3d324467D9c6F36d90247D092f4CF7c3409E4EC7": 1,
+        "0x5E3b1DA7f7b563d68126FB910d08a1afECdd4365": 1
+      },
+      "asks_updated": "2024-05-11T02:51:12.377041702Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-1a",
+          "cluster": "sassyseal",
+          "capacity": 0.0078125,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "657f7552a9c4049b6a42e4c6",
+    "name": "upstage/SOLAR-10.7B-Instruct-v1.0",
+    "display_name": "Upstage SOLAR Instruct v1 (11B)",
+    "display_type": "chat",
+    "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling",
+    "license": "cc-by-nc-4.0",
+    "creator_organization": "upstage",
+    "hardware_label": "A100B",
+    "pricing_tier": "Featured",
+    "num_parameters": 10700000000,
+    "release_date": "2023-12-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "add_generation_prompt": true,
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "chat_template": "{% for message in messages %}{{'<|im_start|>'}}{% if message['role'] == 'user' %}{{'user\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'assistant' %}{{'assistant\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'system' %}{{'system\n' + message['content'] + '<|im_end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
+    },
+    "pricing": { "input": 75, "output": 75 },
+    "created_at": "2023-12-17T22:25:22.252Z",
+    "update_at": "2023-12-17T22:32:58.075Z",
+    "instances": [{ "avzone": "us-east-2a", "cluster": "jumpyjackal" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0x6e7b83610040F22561593472a4A239022A6fc7CE": 1 },
+      "asks_updated": "2024-05-11T02:25:27.512989257Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-2a",
+          "cluster": "jumpyjackal",
+          "capacity": 0.023255813953488372,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64ace3af227f790586239ce6",
+    "name": "wavymulder/Analog-Diffusion",
+    "display_name": "Analog Diffusion",
+    "display_type": "image",
+    "description": "Dreambooth model trained on a diverse set of analog photographs to provide an analog film effect. ",
+    "license": "creativeml-openrail-m",
+    "link": "https://huggingface.co/wavymulder/Analog-Diffusion",
+    "creator_organization": "Wavymulder",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 0,
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "external_pricing_url": "https://www.together.xyz/apis#pricing",
+    "created_at": "2023-07-11T05:07:59.364Z",
+    "update_at": "2023-07-11T05:07:59.364Z",
+    "descriptionLink": "",
+    "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 },
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": { "0xC830b3583bcA51887185318c0184fbdB622A55f5": 1 },
+      "asks_updated": "2024-05-11T03:33:35.26300478Z",
+      "gpus": { "NVIDIA A40": 1 },
+      "options": { "input=text,image": 1 },
+      "qps": 0.012988208,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 0.23378775
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "656a79054d805f78df5fd530",
+    "name": "zero-one-ai/Yi-34B-Chat",
+    "display_name": "01-ai Yi Chat (34B)",
+    "display_type": "chat",
+    "description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
+    "license": "yi-license",
+    "creator_organization": "01.AI",
+    "hardware_label": "A100",
+    "pricing_tier": "Featured",
+    "num_parameters": 34000000000,
+    "release_date": "2023-11-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "config": {
+      "add_generation_prompt": true,
+      "stop": ["<|im_start|>", "<|im_end|>"],
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
+      "chat_template_name": "default"
+    },
+    "pricing": { "input": 200, "output": 200, "base": 0 },
+    "created_at": "2023-12-02T00:23:33.685Z",
+    "update_at": "2023-12-02T00:26:55.827Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 3,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x3faF4AfA52f2A5951B4bde877478B0BF4d69a023": 1,
+        "0x450aDd660C1B1fdB2A7f6bDAE850C4850594FbCD": 1,
+        "0x454Eef2b7f085F0134db5c728ac382aD0c4C9511": 1
+      },
+      "asks_updated": "2024-05-11T05:55:38.392763822Z",
+      "gpus": { "": 0 },
+      "qps": 0.13333333333333333,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "throughput_in": 128.73333333333332,
+      "throughput_out": 34.266666666666666,
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0.2,
+          "qps": 0.13333333333333333,
+          "throughput_in": 128.73333333333332,
+          "throughput_out": 34.266666666666666,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "656fa3548d9fd20968de9ba7",
+    "name": "zero-one-ai/Yi-34B",
+    "display_name": "01-ai Yi Base (34B)",
+    "display_type": "language",
+    "description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
+    "license": "yi-license",
+    "creator_organization": "01.AI",
+    "hardware_label": "A100",
+    "pricing_tier": "Featured",
+    "num_parameters": 34000000000,
+    "release_date": "2023-11-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "pricing": { "input": 200, "output": 200 },
+    "created_at": "2023-12-05T22:25:24.982Z",
+    "update_at": "2023-12-05T22:51:15.306Z",
+    "instances": [{ "avzone": "us-east-1a", "cluster": "happypiglet" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 3,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0x09c253d0c4aB07a89D2f7d23A57eA31bdc760c54": 1,
+        "0x964972F1A61F8BAdbD6163b9888D284CC2E054E9": 1,
+        "0x9B35c58ef3E3425dEa8CBE5f39b8050e40193F68": 1
+      },
+      "asks_updated": "2024-05-11T12:20:16.94106496Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6570718281b9e1cf0455ec53",
+    "name": "zero-one-ai/Yi-6B",
+    "display_name": "01-ai Yi Base (6B)",
+    "display_type": "language",
+    "description": "The Yi series models are large language models trained from scratch by developers at 01.AI",
+    "license": "yi-license",
+    "creator_organization": "01.AI",
+    "hardware_label": "A100",
+    "pricing_tier": "Featured",
+    "num_parameters": 6000000000,
+    "release_date": "2023-11-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": true,
+    "context_length": 4096,
+    "pricing": { "input": 50, "output": 50 },
+    "created_at": "2023-12-06T13:05:06.567Z",
+    "update_at": "2023-12-06T13:07:50.190Z",
+    "instances": [{ "avzone": "us-central-2a", "cluster": "jollyllama" }],
+    "access": "",
+    "link": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 2,
+      "num_bids": 0,
+      "num_running": 0,
+      "asks": {
+        "0xB527b0625620ff3AACCEb84008A7A6684E2d6FbA": 1,
+        "0xf2337a3BA04f483bCb3DbF43584a398e83E20368": 1
+      },
+      "asks_updated": "2024-05-11T02:56:40.830191037Z",
+      "gpus": { "": 0 },
+      "qps": 0,
+      "permit_required": false,
+      "price": { "base": 0, "finetune": 0, "hourly": 0, "input": 0, "output": 0 },
+      "stats": [
+        {
+          "avzone": "us-central-2a",
+          "cluster": "jollyllama",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "66313f416fbdf5d304b833d1",
+    "name": "togethercomputer/Llama-3-8b-chat-hf-int4",
+    "display_name": "Llama3 8B Chat HF INT4",
+    "display_type": "chat",
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "license": "Llama-3 (Other)",
+    "link": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct",
+    "creator_organization": "Meta",
+    "pricing_tier": null,
+    "num_parameters": 8000000000,
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 8192,
+    "owner_userid": null,
+    "config": {
+      "stop": ["<|eot_id|>"],
+      "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
+      "bos_token": "<|begin_of_text|>",
+      "eos_token": "<|end_of_text|>",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-04-18T06:07:59.041Z",
+    "update_at": "2024-04-24T19:14:26.075Z",
+    "instances": [
+      { "avzone": "us-east-1a", "cluster": "happypiglet" },
+      { "avzone": "us-central-5b", "cluster": "blusterybull" }
+    ],
+    "isPrivate": true,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "qps": 0,
+      "throughput_in": 0,
+      "throughput_out": 0,
+      "error_rate": 0,
+      "retry_rate": 0,
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-central-5b",
+          "cluster": "blusterybull",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6630011e324b0032b64f35a0",
+    "name": "togethercomputer/Llama-3-8b-chat-hf-int8",
+    "display_name": "Togethercomputer Llama3 8B Instruct Int8",
+    "display_type": "chat",
+    "description": "Llama 3 is an auto-regressive language model that uses an optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.",
+    "license": "Llama-3 (Other)",
+    "link": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct",
+    "creator_organization": "Meta",
+    "pricing_tier": null,
+    "num_parameters": 8000000000,
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 8192,
+    "owner_userid": null,
+    "config": {
+      "stop": ["<|eot_id|>"],
+      "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
+      "bos_token": "<|begin_of_text|>",
+      "eos_token": "<|end_of_text|>",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2024-04-18T06:07:59.041Z",
+    "update_at": "2024-04-24T19:14:26.075Z",
+    "instances": [
+      { "avzone": "us-east-1a", "cluster": "happypiglet" },
+      { "avzone": "us-central-5b", "cluster": "blusterybull" }
+    ],
+    "isPrivate": true,
+    "access_control": [],
+    "isDedicatedInstance": false,
+    "isFinetuned": false,
+    "access": "",
+    "hardware_label": "",
+    "descriptionLink": "",
+    "depth": {
+      "num_asks": 1,
+      "num_bids": 0,
+      "num_running": 0,
+      "qps": 0,
+      "throughput_in": 0,
+      "throughput_out": 0,
+      "error_rate": 0,
+      "retry_rate": 0,
+      "stats": [
+        {
+          "avzone": "us-east-1a",
+          "cluster": "happypiglet",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        },
+        {
+          "avzone": "us-central-5b",
+          "cluster": "blusterybull",
+          "capacity": 0,
+          "qps": 0,
+          "throughput_in": 0,
+          "throughput_out": 0,
+          "error_rate": 0,
+          "retry_rate": 0
+        }
+      ]
+    }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1112907e072b8aecbe",
+    "name": "EleutherAI/pythia-1b-v0",
+    "display_name": "Pythia (1B)",
+    "display_type": "language",
+    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
+    "license": "",
+    "link": "",
+    "creator_organization": "EleutherAI",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "num_parameters": 1000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "chat_template_name": "gpt" },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:41.925Z",
+    "update_at": "2023-06-23T20:22:41.925Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "649e1ccca073332e47742415",
+    "name": "replit/replit-code-v1-3b",
+    "display_name": "Replit-Code-v1 (3B)",
+    "display_type": "code",
+    "description": "replit-code-v1-3b is a 2.7B Causal Language Model focused on Code Completion. The model has been trained on a subset of the Stack Dedup v1.2 dataset.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Replit",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "limited",
+    "num_parameters": 3000000000,
+    "release_date": "2023-04-26T00:00:00.000Z",
+    "show_in_playground": "true",
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2023-06-30T00:07:40.594Z",
+    "update_at": "2023-07-07T20:09:09.965Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1412907e072b8aecee",
+    "name": "togethercomputer/Pythia-Chat-Base-7B-v0.16",
+    "display_name": "Pythia-Chat-Base (7B)",
+    "display_type": "chat",
+    "description": "Chat model based on EleutherAI’s Pythia-7B model, and is fine-tuned with data focusing on dialog-style interactions.",
+    "license": "",
+    "creator_organization": "Together",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "prompt_format": "<human>: {prompt}\n<bot>:",
+      "stop": ["<human>"],
+      "chat_template_name": "gpt"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:44.251Z",
+    "update_at": "2023-06-23T20:22:44.251Z",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64aceada227f790586239d11",
+    "name": "mosaicml/mpt-7b",
+    "display_name": "MPT (7B)",
+    "display_type": "language",
+    "description": "Decoder-style transformer pretrained from scratch on 1T tokens of English text and code.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Mosaic ML",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["<|endoftext|>"],
+      "chat_template_name": "default",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-11T05:38:34.852Z",
+    "update_at": "2023-07-15T03:06:20.780Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64aceb0e227f790586239d12",
+    "name": "togethercomputer/mpt-30b-chat",
+    "display_name": "MPT-Chat (30B)",
+    "display_type": "chat",
+    "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Mosaic ML",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 30000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["<|im_end|>"],
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+      "chat_template_name": "default",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 200, "output": 200, "hourly": 0 },
+    "created_at": "2023-07-11T05:39:26.078Z",
+    "update_at": "2023-07-11T05:39:26.078Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64ace6df227f790586239cfc",
+    "name": "google/flan-t5-xl",
+    "display_name": "Flan T5 XL (3B)",
+    "display_type": "language",
+    "description": "T5 fine-tuned on more than 1000 additional tasks covering also more languages, making it better than T5 at majority of tasks. ",
+    "license": "",
+    "link": "",
+    "creator_organization": "Google",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": 3000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 512,
+    "config": { "chat_template_name": "default" },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:42.261Z",
+    "update_at": "2023-06-23T20:22:42.261Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acebe0227f790586239d17",
+    "name": "NumbersStation/nsql-6B",
+    "display_name": "NSQL (6B)",
+    "display_type": "language",
+    "description": "Foundation model designed specifically for SQL generation tasks. Pre-trained for 3 epochs and fine-tuned for 10 epochs.",
+    "license": "",
+    "creator_organization": "Numbers Station",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 6000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "stop": ["<|endoftext|>"], "chat_template_name": "default" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-11T05:42:56.540Z",
+    "update_at": "2023-07-11T05:42:56.540Z",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64ace9ca227f790586239d09",
+    "name": "togethercomputer/Koala-7B",
+    "display_name": "Koala (7B)",
+    "display_type": "chat",
+    "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.",
+    "license": "",
+    "link": "",
+    "creator_organization": "LM Sys",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["</s>"],
+      "prompt_format": "USER: {prompt} GPT:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% else %}{{ 'GPT: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ 'GPT:' }}"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-11T05:34:02.521Z",
+    "update_at": "2023-07-11T05:34:02.521Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1212907e072b8aecc0",
+    "name": "EleutherAI/pythia-6.9b",
+    "display_name": "Pythia (6.9B)",
+    "display_type": "language",
+    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
+    "license": "",
+    "creator_organization": "EleutherAI",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "num_parameters": 6900000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "stop": ["<|endoftext|>"], "chat_template_name": "gpt" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:42.044Z",
+    "update_at": "2023-06-23T20:22:42.044Z",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1112907e072b8aecb8",
+    "name": "databricks/dolly-v2-12b",
+    "display_name": "Dolly v2 (12B)",
+    "display_type": "chat",
+    "description": "An instruction-following LLM based on pythia-12b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Databricks",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "num_parameters": 12000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["### End"],
+      "prompt_format": "### Instruction:\n{prompt}\n### Response:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:' }}"
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:41.607Z",
+    "update_at": "2023-06-23T20:22:41.607Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1112907e072b8aecb6",
+    "name": "databricks/dolly-v2-3b",
+    "display_name": "Dolly v2 (3B)",
+    "display_type": "chat",
+    "description": "An instruction-following LLM based on pythia-3b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Databricks",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "num_parameters": 3000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["### End"],
+      "prompt_format": "### Instruction:\n{prompt}\n### Response:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:' }}"
+    },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:41.524Z",
+    "update_at": "2023-06-23T20:22:41.524Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1212907e072b8aecc2",
+    "name": "EleutherAI/gpt-neox-20b",
+    "display_name": "GPT-NeoX (20B)",
+    "display_type": "language",
+    "description": "Autoregressive language model trained on the Pile. Its architecture intentionally resembles that of GPT-3, and is almost identical to that of GPT-J 6B.",
+    "license": "",
+    "link": "",
+    "creator_organization": "EleutherAI",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "num_parameters": 20000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "chat_template_name": "gpt" },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:42.132Z",
+    "update_at": "2023-06-23T20:22:42.132Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1112907e072b8aecbf",
+    "name": "EleutherAI/pythia-2.8b-v0",
+    "display_name": "Pythia (2.8B)",
+    "display_type": "language",
+    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
+    "license": "",
+    "creator_organization": "EleutherAI",
+    "hardware_label": "A40 48GB",
+    "num_parameters": 2800000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "stop": ["<|endoftext|>"], "chat_template_name": "gpt" },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:41.975Z",
+    "update_at": "2023-06-23T20:22:41.975Z",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acebb2227f790586239d16",
+    "name": "NousResearch/Nous-Hermes-13b",
+    "display_name": "Nous Hermes (13B)",
+    "display_type": "language",
+    "description": "LLaMA 13B fine-tuned on over 300,000 instructions. Designed for long responses, low hallucination rate, and absence of censorship mechanisms.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Nous Research",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 13000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "chat_template_name": "llama",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}"
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-07-11T05:42:10.444Z",
+    "update_at": "2023-07-11T05:42:10.444Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64ace8d1227f790586239d03",
+    "name": "togethercomputer/guanaco-65b",
+    "display_name": "Guanaco (65B) ",
+    "display_type": "chat",
+    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Tim Dettmers",
+    "hardware_label": "2X A100 80GB",
+    "pricing_tier": "Supported",
+    "access": "open",
+    "num_parameters": 65000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["###"],
+      "prompt_format": "### Human: {prompt} ### Assistant:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + ' ' }}{% else %}{{ '### Assistant: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant:' }}"
+    },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2023-07-11T05:29:53.740Z",
+    "update_at": "2023-07-11T05:29:53.740Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64b7165fcccc52103e2f07e5",
+    "name": "togethercomputer/llama-2-7b",
+    "display_name": "LLaMA-2 (7B)",
+    "display_type": "language",
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "",
+    "link": "",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "6738415616",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": { "chat_template_name": "llama" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2023-07-18T22:46:55.042Z",
+    "renamed": "meta-llama/Llama-2-7b-hf",
+    "hardware_label": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acf031227f790586239d44",
+    "name": "lmsys/fastchat-t5-3b-v1.0",
+    "display_name": "Vicuna-FastChat-T5 (3B)",
+    "display_type": "chat",
+    "description": "Chatbot trained by fine-tuning Flan-t5-xl on user-shared conversations collected from ShareGPT.",
+    "license": "",
+    "link": "",
+    "creator_organization": "LM Sys",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "num_parameters": 3000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 512,
+    "config": {
+      "stop": ["###", "</s>"],
+      "prompt_format": "### Human: {prompt}\n### Assistant:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + '\n' }}{% else %}{{ '### Assistant: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant:' }}"
+    },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2023-07-11T06:01:21.713Z",
+    "update_at": "2023-07-11T06:01:21.713Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acea6e227f790586239d0e",
+    "name": "huggyllama/llama-7b",
+    "display_name": "LLaMA (7B)",
+    "display_type": "language",
+    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "chat_template_name": "llama" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-11T05:36:46.255Z",
+    "update_at": "2023-07-11T05:36:46.255Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1212907e072b8aecc9",
+    "name": "OpenAssistant/stablelm-7b-sft-v7-epoch-3",
+    "display_name": "Open-Assistant StableLM SFT-7 (7B)",
+    "display_type": "chat",
+    "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ",
+    "license": "",
+    "link": "",
+    "creator_organization": "LAION",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": {
+      "stop": ["<|endoftext|>"],
+      "prompt_format": "<|prompter|>{prompt}<|endoftext|><|assistant|>",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|prompter|>' + message['content'] + '<|endoftext|>' }}{% else %}{{ '<|assistant|>' + message['content'] + '<|endoftext|>\n' }}{% endif %}{% endfor %}{{ '<|assistant|>' }}"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:42.425Z",
+    "update_at": "2023-06-23T20:22:42.425Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1212907e072b8aecc1",
+    "name": "EleutherAI/pythia-12b-v0",
+    "display_name": "Pythia (12B)",
+    "display_type": "language",
+    "description": "The Pythia Scaling Suite is a collection of models developed to facilitate interpretability research.",
+    "license": "",
+    "link": "",
+    "creator_organization": "EleutherAI",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "num_parameters": 12000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "stop": ["<|endoftext|>"], "chat_template_name": "gpt" },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:42.091Z",
+    "update_at": "2023-06-23T20:22:42.091Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64aceb28227f790586239d13",
+    "name": "togethercomputer/mpt-7b-chat",
+    "display_name": "MPT-Chat (7B)",
+    "display_type": "chat",
+    "description": "Chat model for dialogue generation finetuned on ShareGPT-Vicuna, Camel-AI, GPTeacher, Guanaco, Baize and some generated datasets.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Mosaic ML",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["<|im_end|>"],
+      "prompt_format": "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+      "chat_template_name": "default",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-11T05:39:52.024Z",
+    "update_at": "2023-07-11T05:39:52.024Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1112907e072b8aecbc",
+    "name": "EleutherAI/gpt-j-6b",
+    "display_name": "GPT-J (6B)",
+    "display_type": "language",
+    "description": "Transformer model trained using Ben Wang's Mesh Transformer JAX. ",
+    "license": "",
+    "link": "",
+    "creator_organization": "EleutherAI",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 6000000000,
+    "release_date": "2021-06-04T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "chat_template_name": "gpt" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:41.831Z",
+    "update_at": "2023-06-23T20:22:41.831Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1212907e072b8aecc8",
+    "name": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
+    "display_name": "Open-Assistant Pythia SFT-4 (12B)",
+    "display_type": "chat",
+    "description": "Chat-based and open-source assistant. The vision of the project is to make a large language model that can run on a single high-end consumer GPU. ",
+    "license": "",
+    "link": "",
+    "creator_organization": "LAION",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "num_parameters": 12000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["<|endoftext|>"],
+      "prompt_format": "<|prompter|>{prompt}<|endoftext|><|assistant|>",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|prompter|>' + message['content'] + '<|endoftext|>' }}{% else %}{{ '<|assistant|>' + message['content'] + '<|endoftext|>\n' }}{% endif %}{% endfor %}{{ '<|assistant|>' }}"
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:42.383Z",
+    "update_at": "2023-06-23T20:22:42.383Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acf013227f790586239d43",
+    "name": "lmsys/vicuna-7b-v1.3",
+    "display_name": "Vicuna v1.3 (7B)",
+    "display_type": "chat",
+    "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.",
+    "license": "",
+    "link": "",
+    "creator_organization": "LM Sys",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["</s>"],
+      "prompt_format": "USER: {prompt}\nASSISTANT:",
+      "chat_template": "{% for message in messages %}{{message['role'].toLocaleUpperCase() + ': ' + message['content'] + '\n'}}{% endfor %}{{ 'ASSISTANT:' }}"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-11T06:00:51.553Z",
+    "update_at": "2023-07-11T06:00:51.553Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64fbbc5adfdb1e4b06b5d5cc",
+    "name": "Phind/Phind-CodeLlama-34B-Python-v1",
+    "display_name": "Phind Code LLaMA Python v1 (34B)",
+    "display_type": "code",
+    "description": "This model is fine-tuned from CodeLlama-34B-Python and achieves 69.5% pass@1 on HumanEval.",
+    "license": "",
+    "creator_organization": "Phind",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 33743970304,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": {
+      "prompt_format": "### Instruction:\n{prompt}\n### Response:\n",
+      "stop": ["</s>", "###"],
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}"
+    },
+    "pricing": { "input": 200, "output": 200, "hourly": 0 },
+    "created_at": "2023-09-09T00:29:14.496Z",
+    "update_at": "2023-09-09T00:29:14.496Z",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65ac4e5e75846d9d3ae5b836",
+    "name": "NumbersStation/nsql-llama-2-7B",
+    "display_name": "NSQL LLaMA-2 (7B)",
+    "display_type": "code",
+    "description": "NSQL is a family of autoregressive open-source large foundation models (FMs) designed specifically for SQL generation tasks",
+    "link": "",
+    "creator_organization": "Numbers Station",
+    "hardware_label": "A100",
+    "pricing_tier": "Featured",
+    "num_parameters": 7000000000,
+    "release_date": "2024-01-20T22:51:10.492Z",
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "pricing": { "hourly": 0, "input": 50, "output": 50, "finetune": 0, "base": 0 },
+    "created_at": "2024-01-20T22:51:10.492Z",
+    "update_at": "2024-01-20T22:59:48.333Z",
+    "access": "",
+    "license": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6532f0faf94bacfc629b4cf8",
+    "name": "NousResearch/Nous-Hermes-Llama2-70b",
+    "display_name": "Nous Hermes LLaMA-2 (70B)",
+    "display_type": "chat",
+    "description": "Nous-Hermes-Llama2-70b is a state-of-the-art language model fine-tuned on over 300,000 instructions.",
+    "license": "",
+    "link": "",
+    "creator_organization": "NousResearch",
+    "hardware_label": "2X A100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 70000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": {
+      "stop": ["###", "</s>"],
+      "prompt_format": "### Instruction:\n{prompt}\n\n### Response:\n",
+      "chat_template_name": "llama",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:\n' }}"
+    },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2023-10-20T21:28:26.404Z",
+    "update_at": "2023-10-24T17:43:39.278Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64f67555bc372ce719b97f03",
+    "name": "WizardLM/WizardLM-70B-V1.0",
+    "display_name": "WizardLM v1.0 (70B)",
+    "display_type": "language",
+    "description": "This model achieves a substantial and comprehensive improvement on coding, mathematical reasoning and open-domain conversation capacities.",
+    "license": "",
+    "creator_organization": "WizardLM",
+    "hardware_label": "2x A100 80GB",
+    "pricing_tier": "supported",
+    "num_parameters": 70000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": {
+      "stop": ["</s>"],
+      "prompt_format": "USER: {prompt} ASSISTANT:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% else %}{{ 'ASSISTANT:' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ 'ASSISTANT:' }}"
+    },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2023-09-05T00:24:53.327Z",
+    "update_at": "2023-09-05T00:24:53.327Z",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acea57227f790586239d0d",
+    "name": "huggyllama/llama-65b",
+    "display_name": "LLaMA (65B)",
+    "display_type": "language",
+    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Meta",
+    "hardware_label": "2x A100 80GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 65000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "chat_template_name": "llama" },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2023-07-11T05:36:23.656Z",
+    "update_at": "2023-07-11T05:36:23.656Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64fbbc5adfdb1e4b06b5d5ce",
+    "name": "lmsys/vicuna-13b-v1.5-16k",
+    "display_name": "Vicuna v1.5 16K (13B)",
+    "display_type": "chat",
+    "description": "Vicuna is a chat assistant trained by fine-tuning Llama 2 on user-shared conversations collected from ShareGPT.",
+    "license": "",
+    "creator_organization": "LM Sys",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 13015864320,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": {
+      "prompt_format": "USER: {prompt}\nASSISTANT:",
+      "stop": ["</s>"],
+      "chat_template": "{% for message in messages %}{{message['role'].toLocaleUpperCase() + ': ' + message['content'] + '\n'}}{% endfor %}{{ 'ASSISTANT:' }}"
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-09-09T00:29:14.496Z",
+    "update_at": "2023-09-09T00:29:14.496Z",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1312907e072b8aece4",
+    "name": "togethercomputer/GPT-NeoXT-Chat-Base-20B",
+    "display_name": "GPT-NeoXT-Chat-Base (20B)",
+    "display_type": "chat",
+    "description": "Chat model fine-tuned from EleutherAI’s GPT-NeoX with over 40 million instructions on carbon reduced compute.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Together",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": 20000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "prompt_format": "<human>: {prompt}\n<bot>:",
+      "stop": ["<human>"],
+      "chat_template_name": "gpt"
+    },
+    "max_tokens": 995,
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:43.702Z",
+    "update_at": "2023-06-23T20:22:43.702Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "657bed666aca120ac2af2fb7",
+    "name": "HuggingFaceH4/zephyr-7b-beta",
+    "display_name": "Zephyr-7B-ß",
+    "display_type": "chat",
+    "description": "A fine-tuned version of Mistral-7B to act as a helpful assistant.",
+    "license": "",
+    "link": "",
+    "creator_organization": "HuggingFace",
+    "hardware_label": "2x A100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 7241732096,
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 32768,
+    "config": { "stop": ["[INST]", "</s>"], "prompt_format": "<s>[INST] {prompt} [INST]" },
+    "created_at": "2023-12-15T06:08:38.925Z",
+    "update_at": "2023-12-15T06:08:38.925Z",
+    "isFinetuned": false,
+    "descriptionLink": "",
+    "pricing": { "hourly": 0, "input": 0, "output": 0, "base": 0, "finetune": 0 }
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64e78eba589782acafe1781f",
+    "name": "togethercomputer/CodeLlama-13b-Python",
+    "display_name": "Code Llama Python (13B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": "13016028160",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": { "stop": ["</s>"], "chat_template_name": "llama" },
+    "pricing": { "input": 55, "output": 55, "hourly": 0 },
+    "created_at": "2023-08-24T17:09:14.381Z",
+    "update_at": "2023-12-20T22:52:59.177Z",
+    "renamed": "codellama/CodeLlama-13b-Python-hf",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64b7165fcccc52103e2f07e7",
+    "name": "togethercomputer/llama-2-13b",
+    "display_name": "LLaMA-2 (13B)",
+    "display_type": "language",
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "",
+    "link": "",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "13015864320",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": { "chat_template_name": "llama" },
+    "pricing": { "input": 55, "output": 55, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2023-12-04T05:07:52.318Z",
+    "renamed": "meta-llama/Llama-2-13b-hf",
+    "hardware_label": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64e78e89589782acafe1781d",
+    "name": "togethercomputer/CodeLlama-7b-Instruct",
+    "display_name": "Code Llama Instruct (7B)",
+    "display_type": "chat",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": "6738546688",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["</s>", "[INST]"],
+      "chat_template_name": "llama"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-08-24T17:08:25.379Z",
+    "update_at": "2023-08-24T17:08:25.379Z",
+    "renamed": "codellama/CodeLlama-7b-Instruct-hf",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64f0de22caa9e2eb543b373b",
+    "name": "togethercomputer/guanaco-13b",
+    "display_name": "Guanaco (13B) ",
+    "display_type": "chat",
+    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Tim Dettmers",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "Supported",
+    "access": "open",
+    "num_parameters": 13000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["###"],
+      "prompt_format": "### Human: {prompt} ### Assistant:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + ' ' }}{% else %}{{ '### Assistant: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant:' }}"
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-07-11T05:29:07.717Z",
+    "update_at": "2023-07-11T05:29:07.717Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64e7934a589782acafe17822",
+    "name": "togethercomputer/CodeLlama-34b-Python",
+    "display_name": "Code Llama Python (34B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": 34000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": { "stop": ["</s>"], "chat_template_name": "llama" },
+    "pricing": { "input": 194, "output": 194, "hourly": 0 },
+    "created_at": "2023-08-24T17:28:42.172Z",
+    "update_at": "2023-08-24T17:28:42.172Z",
+    "renamed": "codellama/CodeLlama-34b-Python-hf",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64aceb6f227f790586239d15",
+    "name": "mosaicml/mpt-7b-instruct",
+    "display_name": "MPT-Instruct (7B)",
+    "display_type": "language",
+    "description": "Designed for short-form instruction following, finetuned on Dolly and Anthropic HH-RLHF and other datasets",
+    "license": "",
+    "link": "",
+    "creator_organization": "Mosaic ML",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["<|endoftext|>"],
+      "chat_template_name": "default",
+      "add_generation_prompt": true
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-11T05:41:03.757Z",
+    "update_at": "2023-07-11T05:41:03.757Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64b7165fcccc52103e2f07ea",
+    "name": "togethercomputer/llama-2-70b-chat",
+    "display_name": "LLaMA-2 Chat (70B)",
+    "display_type": "chat",
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "",
+    "link": "",
+    "creator_organization": "Meta",
+    "hardware_label": "2X A100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "68976648192",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["[/INST]", "</s>"],
+      "chat_template_name": "llama"
+    },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2023-07-18T22:46:55.042Z",
+    "autopilot_pool": "cr-a100-80-2x",
+    "renamed": "meta-llama/Llama-2-70b-chat-hf",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64e7934a589782acafe17823",
+    "name": "togethercomputer/CodeLlama-34b-Instruct",
+    "display_name": "Code Llama Instruct (34B)",
+    "display_type": "chat",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": 34000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["</s>", "[INST]"],
+      "chat_template_name": "llama",
+      "tools_template": "{{ '<<SYS>>\\n' + systemMessage['content'] + '\\n\\nYou can access the following functions. Use them if required -\\n' + tools + '\\n<</SYS>>\\n\\n' + message['content'] }}"
+    },
+    "pricing": { "input": 194, "output": 194, "hourly": 0 },
+    "created_at": "2023-08-24T17:28:42.172Z",
+    "update_at": "2023-08-24T17:28:42.172Z",
+    "renamed": "codellama/CodeLlama-34b-Instruct-hf",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64e7934a589782acafe17821",
+    "name": "togethercomputer/CodeLlama-34b",
+    "display_name": "Code Llama (34B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": 34000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": { "stop": ["</s>"], "chat_template_name": "llama" },
+    "pricing": { "input": 194, "output": 194, "hourly": 0 },
+    "created_at": "2023-08-24T17:28:42.172Z",
+    "update_at": "2023-08-24T17:28:42.172Z",
+    "renamed": "codellama/CodeLlama-34b-hf",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1412907e072b8aecf1",
+    "name": "Salesforce/codegen2-16B",
+    "display_name": "CodeGen2 (16B)",
+    "display_type": "code",
+    "description": "An autoregressive language models for program synthesis.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Salesforce",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 16000000000,
+    "release_date": "2022-03-25T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "stop": ["\n\n"], "chat_template_name": "gpt" },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:44.453Z",
+    "update_at": "2023-06-23T20:22:44.453Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64ace476227f790586239cef",
+    "name": "Salesforce/codegen2-7B",
+    "display_name": "CodeGen2 (7B)",
+    "display_type": "code",
+    "description": "An autoregressive language models for program synthesis.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Salesforce",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 7000000000,
+    "release_date": "2022-03-25T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "stop": ["\n\n"], "chat_template_name": "gpt" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-11T05:11:18.328Z",
+    "update_at": "2023-07-11T05:11:18.328Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1212907e072b8aecc5",
+    "name": "google/flan-t5-xxl",
+    "display_name": "Flan T5 XXL (11B)",
+    "display_type": "language",
+    "description": "Flan T5 XXL (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)).",
+    "creator_organization": "Google",
+    "hardware_label": "A40 48GB",
+    "access": "open",
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 512,
+    "config": { "chat_template_name": "default" },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:42.261Z",
+    "update_at": "2023-09-01T14:35:00.161Z",
+    "license": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64b7165fcccc52103e2f07e9",
+    "name": "togethercomputer/llama-2-70b",
+    "display_name": "LLaMA-2 (70B)",
+    "display_type": "language",
+    "description": "Language model trained on 2 trillion tokens with double the context length of Llama 1. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "",
+    "link": "",
+    "creator_organization": "Meta",
+    "hardware_label": "2X A100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "68976648192",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": { "chat_template_name": "llama" },
+    "pricing": { "input": 225, "output": 225, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2023-07-18T22:46:55.042Z",
+    "autopilot_pool": "cr-a100-80-2x",
+    "renamed": "meta-llama/Llama-2-70b-hf",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6de95e620478cfa14425f",
+    "name": "codellama/CodeLlama-7b-hf",
+    "display_name": "Code Llama (7B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": "6738546688",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": { "stop": ["</s>"], "chat_template_name": "llama" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-08-24T17:08:25.379Z",
+    "update_at": "2023-08-24T17:08:25.379Z",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6de95e620478cfa14425c",
+    "name": "codellama/CodeLlama-13b-hf",
+    "display_name": "Code Llama (13B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": "13016028160",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": { "stop": ["</s>"], "chat_template_name": "llama" },
+    "pricing": { "input": 55, "output": 55, "hourly": 0 },
+    "created_at": "2023-08-24T17:09:14.381Z",
+    "update_at": "2023-12-21T01:12:38.916Z",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64e78eba589782acafe17820",
+    "name": "togethercomputer/CodeLlama-13b-Instruct",
+    "display_name": "Code Llama Instruct (13B)",
+    "display_type": "chat",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "Featured",
+    "num_parameters": "13016028160",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["</s>", "[INST]"],
+      "chat_template_name": "llama"
+    },
+    "pricing": { "input": 55, "output": 55, "hourly": 0 },
+    "created_at": "2023-08-24T17:09:14.381Z",
+    "update_at": "2023-12-04T05:01:42.539Z",
+    "renamed": "codellama/CodeLlama-13b-Instruct-hf",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64b7165fcccc52103e2f07e8",
+    "name": "togethercomputer/llama-2-13b-chat",
+    "display_name": "LLaMA-2 Chat (13B)",
+    "display_type": "chat",
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "",
+    "link": "",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "13015864320",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["[/INST]", "</s>"],
+      "chat_template_name": "llama"
+    },
+    "pricing": { "input": 55, "output": 55, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2023-12-04T05:00:54.436Z",
+    "renamed": "meta-llama/Llama-2-13b-chat-hf",
+    "hardware_label": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acefe5227f790586239d41",
+    "name": "lmsys/vicuna-13b-v1.3",
+    "display_name": "Vicuna v1.3 (13B)",
+    "display_type": "chat",
+    "description": "Chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT. Auto-regressive model, based on the transformer architecture.",
+    "license": "",
+    "link": "",
+    "creator_organization": "LM Sys",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 13000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["</s>"],
+      "prompt_format": "USER: {prompt}\nASSISTANT:",
+      "chat_template": "{% for message in messages %}{{message['role'].toLocaleUpperCase() + ': ' + message['content'] + '\n'}}{% endfor %}{{ 'ASSISTANT:' }}"
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-07-11T06:00:05.166Z",
+    "update_at": "2023-07-15T03:08:44.173Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acea0b227f790586239d0b",
+    "name": "huggyllama/llama-13b",
+    "display_name": "LLaMA (13B)",
+    "display_type": "language",
+    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 13000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "chat_template_name": "llama" },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-07-11T05:35:07.955Z",
+    "update_at": "2023-07-11T05:35:07.955Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acefbe227f790586239d40",
+    "name": "HuggingFaceH4/starchat-alpha",
+    "display_name": "StarCoderChat Alpha (16B)",
+    "display_type": "chat",
+    "description": "Fine-tuned from StarCoder to act as a helpful coding assistant. As an alpha release is only intended for educational or research purpopses.",
+    "license": "",
+    "link": "",
+    "creator_organization": "HuggingFaceH4",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "num_parameters": 16000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 8192,
+    "config": {
+      "stop": ["<|endoftext|>", "<|end|>"],
+      "prompt_format": "<|system|>\n<|end|>\n<|user|>\n{prompt}<|end|>\n<|assistant|>",
+      "chat_template_name": "default"
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-07-11T05:59:26.298Z",
+    "update_at": "2023-07-11T05:59:26.298Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acea35227f790586239d0c",
+    "name": "huggyllama/llama-30b",
+    "display_name": "LLaMA (30B)",
+    "display_type": "language",
+    "description": "An auto-regressive language model, based on the transformer architecture. The model comes in different sizes: 7B, 13B, 33B and 65B parameters.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "access": "open",
+    "num_parameters": 33000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "chat_template_name": "llama" },
+    "pricing": { "input": 200, "output": 200, "hourly": 0 },
+    "created_at": "2023-07-11T05:35:49.870Z",
+    "update_at": "2023-07-11T05:35:49.870Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1412907e072b8aecf4",
+    "name": "stabilityai/stablelm-base-alpha-3b",
+    "display_name": "StableLM-Base-Alpha (3B)",
+    "display_type": "language",
+    "description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Stability AI",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "num_parameters": 3000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": { "chat_template_name": "gpt" },
+    "pricing": { "input": 25, "output": 25, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:44.907Z",
+    "update_at": "2023-06-23T20:22:44.907Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1512907e072b8aecf5",
+    "name": "stabilityai/stablelm-base-alpha-7b",
+    "display_name": "StableLM-Base-Alpha (7B)",
+    "display_type": "language",
+    "description": "Decoder-only language model pre-trained on a diverse collection of English and Code datasets with a sequence length of 4096.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Stability AI",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": { "chat_template_name": "gpt" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:45.249Z",
+    "update_at": "2023-06-23T20:22:45.249Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64e78e89589782acafe1781c",
+    "name": "togethercomputer/CodeLlama-7b-Python",
+    "display_name": "Code Llama Python (7B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": "6738546688",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": { "stop": ["</s>"], "chat_template_name": "llama" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-08-24T17:08:25.379Z",
+    "update_at": "2023-08-24T17:08:25.379Z",
+    "renamed": "codellama/CodeLlama-7b-Python-hf",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64f67987bc372ce719b97f07",
+    "name": "defog/sqlcoder",
+    "display_name": "Sqlcoder (15B)",
+    "display_type": "language",
+    "description": "Defog's SQLCoder is a state-of-the-art LLM for converting natural language questions to SQL queries, fine-tuned from Bigcode's Starcoder 15B model.",
+    "license": "",
+    "creator_organization": "Defog",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 15000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 8192,
+    "config": {
+      "stop": ["<|endoftext|>"],
+      "prompt_format": "### Instructions:\n\n{prompt}\n\n### Response:\n"
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-09-05T00:42:47.496Z",
+    "update_at": "2023-09-05T00:42:47.496Z",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64acef6e227f790586239d3f",
+    "name": "bigcode/starcoder",
+    "display_name": "StarCoder (16B)",
+    "display_type": "code",
+    "description": "Trained on 80+ coding languages, uses Multi Query Attention, an 8K context window, and was trained using the Fill-in-the-Middle objective on 1T tokens.",
+    "license": "",
+    "link": "",
+    "creator_organization": "BigCode",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "supported",
+    "num_parameters": 16000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 8192,
+    "config": { "stop": ["<|endoftext|>", "<|end|>"] },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-07-11T05:58:06.486Z",
+    "update_at": "2023-07-11T05:58:06.486Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1112907e072b8aecb7",
+    "name": "databricks/dolly-v2-7b",
+    "display_name": "Dolly v2 (7B)",
+    "display_type": "chat",
+    "description": "An instruction-following LLM based on pythia-7b, and trained on ~15k instruction/response fine tuning records generated by Databricks employees.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Databricks",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["### End"],
+      "prompt_format": "### Instruction:\n{prompt}\n### Response:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + message['content'] + '\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Response:' }}"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:41.565Z",
+    "update_at": "2023-06-23T20:22:41.565Z",
+    "access": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64ace8a3227f790586239d02",
+    "name": "togethercomputer/guanaco-33b",
+    "display_name": "Guanaco (33B) ",
+    "display_type": "chat",
+    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks.",
+    "license": "",
+    "link": "",
+    "creator_organization": "Tim Dettmers",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "Supported",
+    "access": "open",
+    "num_parameters": 33000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["###"],
+      "prompt_format": "### Human: {prompt} ### Assistant:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + ' ' }}{% else %}{{ '### Assistant: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant:' }}"
+    },
+    "pricing": { "input": 200, "output": 200, "hourly": 0 },
+    "created_at": "2023-07-11T05:29:07.717Z",
+    "update_at": "2023-07-11T05:29:07.717Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64ace9b1227f790586239d07",
+    "name": "togethercomputer/Koala-13B",
+    "display_name": "Koala (13B)",
+    "display_type": "chat",
+    "description": "Chatbot trained by fine-tuning LLaMA on dialogue data gathered from the web.",
+    "license": "",
+    "link": "",
+    "creator_organization": "LM Sys",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "supported",
+    "access": "open",
+    "num_parameters": 13000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["</s>"],
+      "prompt_format": "USER: {prompt} GPT:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% else %}{{ 'GPT: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ 'GPT:' }}"
+    },
+    "pricing": { "input": 75, "output": 75, "hourly": 0 },
+    "created_at": "2023-07-11T05:33:37.737Z",
+    "update_at": "2023-07-11T05:33:37.737Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6495ff1312907e072b8aece2",
+    "name": "togethercomputer/GPT-JT-6B-v1",
+    "display_name": "GPT-JT (6B)",
+    "display_type": "language",
+    "description": "Fork of GPT-J instruction tuned to excel at few-shot prompts (blog post).",
+    "descriptionLink": "https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai",
+    "license": "",
+    "link": "",
+    "creator_organization": "Together",
+    "hardware_label": "A40 48GB",
+    "pricing_tier": "featured",
+    "access": "open",
+    "num_parameters": 6700000000,
+    "release_date": "2022-11-29T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": { "chat_template_name": "gpt" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-06-23T20:22:43.617Z",
+    "update_at": "2023-06-23T20:22:43.617Z"
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64b7165fcccc52103e2f07e6",
+    "name": "togethercomputer/llama-2-7b-chat",
+    "display_name": "LLaMA-2 Chat (7B)",
+    "display_type": "chat",
+    "description": "Llama 2-chat leverages publicly available instruction datasets and over 1 million human annotations. Available in three sizes: 7B, 13B and 70B parameters",
+    "license": "",
+    "link": "",
+    "creator_organization": "Meta",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": "6738415616",
+    "show_in_playground": true,
+    "finetuning_supported": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": {
+      "prompt_format": "[INST] {prompt} [/INST]",
+      "stop": ["[/INST]", "</s>"],
+      "chat_template_name": "llama"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-18T22:46:55.042Z",
+    "update_at": "2023-07-18T22:46:55.042Z",
+    "renamed": "meta-llama/Llama-2-7b-chat-hf",
+    "hardware_label": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "662b250e246deee9aefbcc50",
+    "name": "togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4",
+    "display_name": "Upstage SOLAR Instruct v1 (11B)-Int4",
+    "display_type": "chat",
+    "description": "Built on the Llama2 architecture, SOLAR-10.7B incorporates the innovative Upstage Depth Up-Scaling",
+    "license": "",
+    "creator_organization": "upstage",
+    "hardware_label": "A100B",
+    "pricing_tier": "Featured",
+    "num_parameters": 10700000000,
+    "release_date": "2023-12-01T00:00:00.000Z",
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": {
+      "add_generation_prompt": true,
+      "stop": ["<|im_end|>", "<|im_start|>"],
+      "chat_template": "{% for message in messages %}{{'<|im_start|>'}}{% if message['role'] == 'user' %}{{'user\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'assistant' %}{{'assistant\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'system' %}{{'system\n' + message['content'] + '<|im_end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
+    },
+    "pricing": { "input": 75, "output": 75 },
+    "created_at": "2024-04-26T03:52:46.866Z",
+    "update_at": "2024-04-26T03:52:46.866Z",
+    "instances": [],
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "64ace8ed227f790586239d04",
+    "name": "togethercomputer/guanaco-7b",
+    "display_name": "Guanaco (7B) ",
+    "display_type": "chat",
+    "description": "Instruction-following language model built on LLaMA. Expanding upon the initial 52K dataset from the Alpaca model, an additional 534,530 focused on multi-lingual tasks. ",
+    "license": "",
+    "link": "",
+    "creator_organization": "Tim Dettmers",
+    "hardware_label": "A40 48GB",
+    "access": "open",
+    "num_parameters": 7000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 2048,
+    "config": {
+      "stop": ["###"],
+      "prompt_format": "### Human: {prompt} ### Assistant:",
+      "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + ' ' }}{% else %}{{ '### Assistant: ' + message['content'] + '\n' }}{% endif %}{% endfor %}{{ '### Assistant:' }}"
+    },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-07-11T05:30:21.531Z",
+    "update_at": "2023-07-11T05:30:21.531Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "6532f0faf94bacfc629b4cf7",
+    "name": "EleutherAI/llemma_7b",
+    "display_name": "Llemma (7B)",
+    "display_type": "language",
+    "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens.",
+    "license": "",
+    "link": "",
+    "creator_organization": "EleutherAI",
+    "hardware_label": "A100 80GB",
+    "pricing_tier": "Featured",
+    "access": "open",
+    "num_parameters": 6738546688,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 4096,
+    "config": { "chat_template_name": "llama" },
+    "pricing": { "input": 50, "output": 50, "hourly": 0 },
+    "created_at": "2023-10-20T21:28:26.403Z",
+    "update_at": "2023-10-24T17:42:38.630Z",
+    "descriptionLink": ""
+  },
+  {
+    "modelInstanceConfig": { "appearsIn": [], "order": 0 },
+    "_id": "65a6de96e620478cfa144262",
+    "name": "codellama/CodeLlama-34b-hf",
+    "display_name": "Code Llama (34B)",
+    "display_type": "code",
+    "description": "Code Llama is a family of large language models for code based on Llama 2 providing infilling capabilities, support for large input contexts, and zero-shot instruction following ability for programming tasks.",
+    "license": "",
+    "creator_organization": "Meta",
+    "hardware_label": "A100 80GB",
+    "num_parameters": 34000000000,
+    "show_in_playground": true,
+    "isFeaturedModel": false,
+    "context_length": 16384,
+    "config": { "stop": ["</s>"], "chat_template_name": "llama" },
+    "pricing": { "input": 194, "output": 194, "hourly": 0 },
+    "created_at": "2023-08-24T17:28:42.172Z",
+    "update_at": "2023-08-24T17:28:42.172Z",
+    "access": "",
+    "link": "",
+    "descriptionLink": ""
+  }
+]
diff --git a/src/libs/agent-runtime/togetherai/index.test.ts b/src/libs/agent-runtime/togetherai/index.test.ts
index e13d62375ea2..ac2fad9ebdfc 100644
--- a/src/libs/agent-runtime/togetherai/index.test.ts
+++ b/src/libs/agent-runtime/togetherai/index.test.ts
@@ -2,9 +2,10 @@
 import OpenAI from 'openai';
 import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 
-import { ChatStreamCallbacks, LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime';
+import { LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime';
 
 import * as debugStreamModule from '../utils/debugStream';
+import models from './fixtures/models.json';
 import { LobeTogetherAI } from './index';
 
 const provider = 'togetherai';
@@ -295,4 +296,17 @@ describe('LobeTogetherAI', () => {
       });
     });
   });
+
+  describe('models', () => {
+    it('should get models', async () => {
+      vi.spyOn(globalThis, 'fetch').mockResolvedValueOnce({
+        json: async () => models,
+        ok: true,
+      } as Response);
+
+      const list = await instance.models();
+
+      expect(list).toMatchSnapshot();
+    });
+  });
 });
diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.test.ts b/src/libs/agent-runtime/utils/anthropicHelpers.test.ts
index 8f15e880c8c9..9fd320d9cacd 100644
--- a/src/libs/agent-runtime/utils/anthropicHelpers.test.ts
+++ b/src/libs/agent-runtime/utils/anthropicHelpers.test.ts
@@ -1,3 +1,4 @@
+import { OpenAI } from 'openai';
 import { describe, expect, it } from 'vitest';
 
 import { OpenAIChatMessage, UserMessageContentPart } from '../types/chat';
@@ -5,6 +6,7 @@ import {
   buildAnthropicBlock,
   buildAnthropicMessage,
   buildAnthropicMessages,
+  buildAnthropicTools,
 } from './anthropicHelpers';
 import { parseDataUri } from './uriParser';
 
@@ -51,6 +53,86 @@ describe('anthropicHelpers', () => {
       const result = buildAnthropicMessage(message);
       expect(result).toEqual({ content: [{ type: 'text', text: 'Hello!' }], role: 'user' });
     });
+
+    it('should correctly convert user message with string content', () => {
+      const message: OpenAIChatMessage = {
+        content: 'Hello!',
+        role: 'user',
+      };
+      const result = buildAnthropicMessage(message);
+      expect(result).toEqual({ content: 'Hello!', role: 'user' });
+    });
+
+    it('should correctly convert user message with content parts', () => {
+      const message: OpenAIChatMessage = {
+        content: [
+          { type: 'text', text: 'Check out this image:' },
+          { type: 'image_url', image_url: { url: 'data:image/png;base64,abc123' } },
+        ],
+        role: 'user',
+      };
+      const result = buildAnthropicMessage(message);
+      expect(result.role).toBe('user');
+      expect(result.content).toHaveLength(2);
+      expect((result.content[1] as any).type).toBe('image');
+    });
+
+    it('should correctly convert tool message', () => {
+      const message: OpenAIChatMessage = {
+        content: 'Tool result content',
+        role: 'tool',
+        tool_call_id: 'tool123',
+      };
+      const result = buildAnthropicMessage(message);
+      expect(result.role).toBe('user');
+      expect(result.content).toEqual([
+        {
+          content: 'Tool result content',
+          tool_use_id: 'tool123',
+          type: 'tool_result',
+        },
+      ]);
+    });
+
+    it('should correctly convert assistant message with tool calls', () => {
+      const message: OpenAIChatMessage = {
+        content: 'Here is the result:',
+        role: 'assistant',
+        tool_calls: [
+          {
+            id: 'call1',
+            type: 'function',
+            function: {
+              name: 'search',
+              arguments: '{"query":"anthropic"}',
+            },
+          },
+        ],
+      };
+      const result = buildAnthropicMessage(message);
+      expect(result.role).toBe('assistant');
+      expect(result.content).toEqual([
+        { text: 'Here is the result:', type: 'text' },
+        {
+          id: 'call1',
+          input: { query: 'anthropic' },
+          name: 'search',
+          type: 'tool_use',
+        },
+      ]);
+    });
+
+    it('should correctly convert function message', () => {
+      const message: OpenAIChatMessage = {
+        content: 'def hello(name):\n  return f"Hello {name}"',
+        role: 'function',
+      };
+      const result = buildAnthropicMessage(message);
+      expect(result).toEqual({
+        content: 'def hello(name):\n  return f"Hello {name}"',
+        role: 'assistant',
+      });
+    });
   });
 
   describe('buildAnthropicMessages', () => {
@@ -111,4 +193,41 @@ describe('anthropicHelpers', () => {
       ]);
     });
   });
+
+  describe('buildAnthropicTools', () => {
+    it('should correctly convert OpenAI tools to Anthropic format', () => {
+      const tools: OpenAI.ChatCompletionTool[] = [
+        {
+          type: 'function',
+          function: {
+            name: 'search',
+            description: 'Searches the web',
+            parameters: {
+              type: 'object',
+              properties: {
+                query: { type: 'string' },
+              },
+              required: ['query'],
+            },
+          },
+        },
+      ];
+
+      const result = buildAnthropicTools(tools);
+
+      expect(result).toEqual([
+        {
+          name: 'search',
+          description: 'Searches the web',
+          input_schema: {
+            type: 'object',
+            properties: {
+              query: { type: 'string' },
+            },
+            required: ['query'],
+          },
+        },
+      ]);
+    });
+  });
 });
diff --git a/src/libs/agent-runtime/utils/debugStream.test.ts b/src/libs/agent-runtime/utils/debugStream.test.ts
new file mode 100644
index 000000000000..1b98a3fcb317
--- /dev/null
+++ b/src/libs/agent-runtime/utils/debugStream.test.ts
@@ -0,0 +1,70 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { debugStream } from './debugStream';
+
+describe('debugStream', () => {
+  let consoleLogSpy: ReturnType<typeof vi.spyOn>;
+  let consoleErrorSpy: ReturnType<typeof vi.spyOn>;
+
+  beforeEach(() => {
+    consoleLogSpy = vi.spyOn(console, 'log').mockImplementation(() => {});
+    consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
+  });
+
+  afterEach(() => {
+    consoleLogSpy.mockRestore();
+    consoleErrorSpy.mockRestore();
+  });
+
+  it('should log stream start and end messages', async () => {
+    const stream = new ReadableStream({
+      start(controller) {
+        controller.enqueue('test chunk');
+        controller.close();
+      },
+    });
+
+    await debugStream(stream);
+
+    expect(consoleLogSpy).toHaveBeenCalledWith(expect.stringMatching(/^\[stream start\]/));
+  });
+
+  it('should handle and log stream errors', async () => {
+    const stream = new ReadableStream({
+      start(controller) {
+        controller.enqueue('test chunk');
+      },
+    });
+
+    await debugStream(stream);
+
+    expect(consoleErrorSpy).toHaveBeenCalledWith('[debugStream error]', expect.any(Error));
+    expect(consoleErrorSpy).toHaveBeenCalledWith('[error chunk value:]', 'test chunk');
+  });
+
+  it('should decode ArrayBuffer chunk values', async () => {
+    const stream = new ReadableStream({
+      start(controller) {
+        controller.enqueue(new TextEncoder().encode('test chunk'));
+        controller.close();
+      },
+    });
+
+    await debugStream(stream);
+
+    expect(consoleLogSpy).toHaveBeenCalledWith('test chunk');
+  });
+
+  it('should stringify non-string chunk values', async () => {
+    const stream = new ReadableStream({
+      start(controller) {
+        controller.enqueue({ test: 'chunk' });
+        controller.close();
+      },
+    });
+
+    await debugStream(stream);
+
+    expect(consoleLogSpy).toHaveBeenCalledWith('{"test":"chunk"}');
+  });
+});
diff --git a/src/libs/agent-runtime/utils/streams/anthropic.test.ts b/src/libs/agent-runtime/utils/streams/anthropic.test.ts
new file mode 100644
index 000000000000..2dffff7c34f4
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/anthropic.test.ts
@@ -0,0 +1,197 @@
+import type { Stream } from '@anthropic-ai/sdk/streaming';
+import { describe, expect, it, vi } from 'vitest';
+
+import { AnthropicStream } from './anthropic';
+
+describe('AnthropicStream', () => {
+  it('should transform Anthropic stream to protocol stream', async () => {
+    // @ts-ignore
+    const mockAnthropicStream: Stream = {
+      [Symbol.asyncIterator]() {
+        let count = 0;
+        return {
+          next: async () => {
+            switch (count) {
+              case 0:
+                count++;
+                return {
+                  done: false,
+                  value: {
+                    type: 'message_start',
+                    message: { id: 'message_1', metadata: {} },
+                  },
+                };
+              case 1:
+                count++;
+                return {
+                  done: false,
+                  value: {
+                    type: 'content_block_delta',
+                    delta: { type: 'text_delta', text: 'Hello' },
+                  },
+                };
+              case 2:
+                count++;
+                return {
+                  done: false,
+                  value: {
+                    type: 'content_block_delta',
+                    delta: { type: 'text_delta', text: ' world!' },
+                  },
+                };
+              case 3:
+                count++;
+                return {
+                  done: false,
+                  value: {
+                    type: 'message_delta',
+                    delta: { stop_reason: 'stop' },
+                  },
+                };
+              default:
+                return { done: true, value: undefined };
+            }
+          },
+        };
+      },
+    };
+
+    const onStartMock = vi.fn();
+    const onTextMock = vi.fn();
+    const onTokenMock = vi.fn();
+    const onCompletionMock = vi.fn();
+
+    const protocolStream = AnthropicStream(mockAnthropicStream, {
+      onStart: onStartMock,
+      onText: onTextMock,
+      onToken: onTokenMock,
+      onCompletion: onCompletionMock,
+    });
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      'id: message_1\n',
+      'event: data\n',
+      `data: {"id":"message_1","metadata":{}}\n\n`,
+      'id: message_1\n',
+      'event: text\n',
+      `data: "Hello"\n\n`,
+      'id: message_1\n',
+      'event: text\n',
+      `data: " world!"\n\n`,
+      'id: message_1\n',
+      'event: stop\n',
+      `data: "stop"\n\n`,
+    ]);
+
+    expect(onStartMock).toHaveBeenCalledTimes(1);
+    expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
+    expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
+    expect(onTokenMock).toHaveBeenCalledTimes(2);
+    expect(onCompletionMock).toHaveBeenCalledTimes(1);
+  });
+
+  it('should handle tool use event and ReadableStream input', async () => {
+    const toolUseEvent = {
+      type: 'content_block_delta',
+      delta: {
+        type: 'tool_use',
+        tool_use: {
+          id: 'tool_use_1',
+          name: 'example_tool',
+          input: { arg1: 'value1' },
+        },
+      },
+    };
+
+    const mockReadableStream = new ReadableStream({
+      start(controller) {
+        controller.enqueue({
+          type: 'message_start',
+          message: { id: 'message_1', metadata: {} },
+        });
+        controller.enqueue(toolUseEvent);
+        controller.enqueue({
+          type: 'message_stop',
+        });
+        controller.close();
+      },
+    });
+
+    const onToolCallMock = vi.fn();
+
+    const protocolStream = AnthropicStream(mockReadableStream, {
+      onToolCall: onToolCallMock,
+    });
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      'id: message_1\n',
+      'event: data\n',
+      `data: {"id":"message_1","metadata":{}}\n\n`,
+      'id: message_1\n',
+      'event: tool_calls\n',
+      `data: [{"function":{"arguments":"{\\"arg1\\":\\"value1\\"}","name":"example_tool"},"id":"tool_use_1","index":0,"type":"function"}]\n\n`,
+      'id: message_1\n',
+      'event: stop\n',
+      `data: "message_stop"\n\n`,
+    ]);
+
+    expect(onToolCallMock).toHaveBeenCalledTimes(1);
+  });
+
+  it('should handle ReadableStream input', async () => {
+    const mockReadableStream = new ReadableStream({
+      start(controller) {
+        controller.enqueue({
+          type: 'message_start',
+          message: { id: 'message_1', metadata: {} },
+        });
+        controller.enqueue({
+          type: 'content_block_delta',
+          delta: { type: 'text_delta', text: 'Hello' },
+        });
+        controller.enqueue({
+          type: 'message_stop',
+        });
+        controller.close();
+      },
+    });
+
+    const protocolStream = AnthropicStream(mockReadableStream);
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      'id: message_1\n',
+      'event: data\n',
+      `data: {"id":"message_1","metadata":{}}\n\n`,
+      'id: message_1\n',
+      'event: text\n',
+      `data: "Hello"\n\n`,
+      'id: message_1\n',
+      'event: stop\n',
+      `data: "message_stop"\n\n`,
+    ]);
+  });
+});
diff --git a/src/libs/agent-runtime/utils/streams/bedrock/llama.test.ts b/src/libs/agent-runtime/utils/streams/bedrock/llama.test.ts
new file mode 100644
index 000000000000..2032faf73ce3
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/bedrock/llama.test.ts
@@ -0,0 +1,196 @@
+import { InvokeModelWithResponseStreamResponse } from '@aws-sdk/client-bedrock-runtime';
+import { Readable } from 'stream';
+import { describe, expect, it, vi } from 'vitest';
+
+import * as uuidModule from '@/utils/uuid';
+
+import { AWSBedrockLlamaStream } from './llama';
+
+describe('AWSBedrockLlamaStream', () => {
+  it('should transform Bedrock Llama stream to protocol stream', async () => {
+    vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
+    const mockBedrockStream = new ReadableStream({
+      start(controller) {
+        controller.enqueue({ generation: 'Hello', generation_token_count: 1 });
+        controller.enqueue({ generation: ' world!', generation_token_count: 2 });
+        controller.enqueue({ stop_reason: 'stop' });
+        controller.close();
+      },
+    });
+
+    const onStartMock = vi.fn();
+    const onTextMock = vi.fn();
+    const onTokenMock = vi.fn();
+    const onCompletionMock = vi.fn();
+
+    const protocolStream = AWSBedrockLlamaStream(mockBedrockStream, {
+      onStart: onStartMock,
+      onText: onTextMock,
+      onToken: onTokenMock,
+      onCompletion: onCompletionMock,
+    });
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      'id: chat_1\n',
+      'event: text\n',
+      `data: "Hello"\n\n`,
+      'id: chat_1\n',
+      'event: text\n',
+      `data: " world!"\n\n`,
+      'id: chat_1\n',
+      'event: stop\n',
+      `data: "finished"\n\n`,
+    ]);
+
+    expect(onStartMock).toHaveBeenCalledTimes(1);
+    expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
+    expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
+    expect(onTokenMock).toHaveBeenCalledTimes(2);
+    expect(onCompletionMock).toHaveBeenCalledTimes(1);
+  });
+
+  it('should transform Bedrock Llama AsyncIterator to protocol stream', async () => {
+    vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
+
+    const mockBedrockStream: InvokeModelWithResponseStreamResponse = {
+      body: {
+        // @ts-ignore
+        async *[Symbol.asyncIterator]() {
+          yield { generation: 'Hello', generation_token_count: 1 };
+          yield { generation: ' world!', generation_token_count: 2 };
+          yield { stop_reason: 'stop' };
+        },
+      },
+    };
+
+    const onStartMock = vi.fn();
+    const onTextMock = vi.fn();
+    const onTokenMock = vi.fn();
+    const onCompletionMock = vi.fn();
+
+    const protocolStream = AWSBedrockLlamaStream(mockBedrockStream, {
+      onStart: onStartMock,
+      onText: onTextMock,
+      onToken: onTokenMock,
+      onCompletion: onCompletionMock,
+    });
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      'id: chat_1\n',
+      'event: text\n',
+      `data: "Hello"\n\n`,
+      'id: chat_1\n',
+      'event: text\n',
+      `data: " world!"\n\n`,
+      'id: chat_1\n',
+      'event: stop\n',
+      `data: "finished"\n\n`,
+    ]);
+
+    expect(onStartMock).toHaveBeenCalledTimes(1);
+    expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
+    expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
+    expect(onTokenMock).toHaveBeenCalledTimes(2);
+    expect(onCompletionMock).toHaveBeenCalledTimes(1);
+  });
+
+  it('should handle Bedrock response with chunk property', async () => {
+    vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('2');
+
+    const mockBedrockStream: InvokeModelWithResponseStreamResponse = {
+      contentType: 'any',
+      body: {
+        // @ts-ignore
+        async *[Symbol.asyncIterator]() {
+          yield {
+            chunk: {
+              bytes: new TextEncoder().encode('{"generation":"Hello","generation_token_count":1}'),
+            },
+          };
+          yield {
+            chunk: {
+              bytes: new TextEncoder().encode(
+                '{"generation":" world!","generation_token_count":2}',
+              ),
+            },
+          };
+          yield { chunk: { bytes: new TextEncoder().encode('{"stop_reason":"stop"}') } };
+        },
+      },
+    };
+
+    const onStartMock = vi.fn();
+    const onTextMock = vi.fn();
+    const onTokenMock = vi.fn();
+    const onCompletionMock = vi.fn();
+
+    const protocolStream = AWSBedrockLlamaStream(mockBedrockStream, {
+      onStart: onStartMock,
+      onText: onTextMock,
+      onToken: onTokenMock,
+      onCompletion: onCompletionMock,
+    });
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      'id: chat_2\n',
+      'event: text\n',
+      `data: "Hello"\n\n`,
+      'id: chat_2\n',
+      'event: text\n',
+      `data: " world!"\n\n`,
+      'id: chat_2\n',
+      'event: stop\n',
+      `data: "finished"\n\n`,
+    ]);
+
+    expect(onStartMock).toHaveBeenCalledTimes(1);
+    expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
+    expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
+    expect(onTokenMock).toHaveBeenCalledTimes(2);
+    expect(onCompletionMock).toHaveBeenCalledTimes(1);
+  });
+
+  it('should handle empty stream', async () => {
+    const mockBedrockStream = new ReadableStream({
+      start(controller) {
+        controller.close();
+      },
+    });
+
+    const protocolStream = AWSBedrockLlamaStream(mockBedrockStream);
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([]);
+  });
+});
diff --git a/src/libs/agent-runtime/utils/streams/google-ai.test.ts b/src/libs/agent-runtime/utils/streams/google-ai.test.ts
new file mode 100644
index 000000000000..7477f86502a2
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/google-ai.test.ts
@@ -0,0 +1,97 @@
+import { EnhancedGenerateContentResponse } from '@google/generative-ai';
+import { describe, expect, it, vi } from 'vitest';
+
+import * as uuidModule from '@/utils/uuid';
+
+import { GoogleGenerativeAIStream } from './google-ai';
+
+describe('GoogleGenerativeAIStream', () => {
+  it('should transform Google Generative AI stream to protocol stream', async () => {
+    vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
+
+    const mockGenerateContentResponse = (text: string, functionCalls?: any[]) =>
+      ({
+        text: () => text,
+        functionCall: () => functionCalls?.[0],
+        functionCalls: () => functionCalls,
+      }) as EnhancedGenerateContentResponse;
+
+    const mockGoogleStream = new ReadableStream({
+      start(controller) {
+        controller.enqueue(mockGenerateContentResponse('Hello'));
+
+        controller.enqueue(
+          mockGenerateContentResponse('', [{ name: 'testFunction', args: { arg1: 'value1' } }]),
+        );
+        controller.enqueue(mockGenerateContentResponse(' world!'));
+        controller.close();
+      },
+    });
+
+    const onStartMock = vi.fn();
+    const onTextMock = vi.fn();
+    const onTokenMock = vi.fn();
+    const onToolCallMock = vi.fn();
+    const onCompletionMock = vi.fn();
+
+    const protocolStream = GoogleGenerativeAIStream(mockGoogleStream, {
+      onStart: onStartMock,
+      onText: onTextMock,
+      onToken: onTokenMock,
+      onToolCall: onToolCallMock,
+      onCompletion: onCompletionMock,
+    });
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      // text
+      'id: chat_1\n',
+      'event: text\n',
+      `data: "Hello"\n\n`,
+
+      // tool call
+      'id: chat_1\n',
+      'event: tool_calls\n',
+      `data: [{"function":{"arguments":"{\\"arg1\\":\\"value1\\"}","name":"testFunction"},"id":"testFunction_0","index":0,"type":"function"}]\n\n`,
+
+      // text
+      'id: chat_1\n',
+      'event: text\n',
+      `data: " world!"\n\n`,
+    ]);
+
+    expect(onStartMock).toHaveBeenCalledTimes(1);
+    expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
+    expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
+    expect(onTokenMock).toHaveBeenCalledTimes(2);
+    expect(onToolCallMock).toHaveBeenCalledTimes(1);
+    expect(onCompletionMock).toHaveBeenCalledTimes(1);
+  });
+
+  it('should handle empty stream', async () => {
+    const mockGoogleStream = new ReadableStream({
+      start(controller) {
+        controller.close();
+      },
+    });
+
+    const protocolStream = GoogleGenerativeAIStream(mockGoogleStream);
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([]);
+  });
+});
diff --git a/src/libs/agent-runtime/utils/streams/ollama.test.ts b/src/libs/agent-runtime/utils/streams/ollama.test.ts
new file mode 100644
index 000000000000..08202fe78011
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/ollama.test.ts
@@ -0,0 +1,77 @@
+import { ChatResponse } from 'ollama/browser';
+import { describe, expect, it, vi } from 'vitest';
+
+import * as uuidModule from '@/utils/uuid';
+
+import { OllamaStream } from './ollama';
+
+describe('OllamaStream', () => {
+  it('should transform Ollama stream to protocol stream', async () => {
+    vi.spyOn(uuidModule, 'nanoid').mockReturnValueOnce('1');
+
+    const mockOllamaStream: AsyncIterable<ChatResponse> = {
+      // @ts-ignore
+      async *[Symbol.asyncIterator]() {
+        yield { message: { content: 'Hello' }, done: false };
+        yield { message: { content: ' world!' }, done: false };
+        yield { message: { content: '' }, done: true };
+      },
+    };
+
+    const onStartMock = vi.fn();
+    const onTextMock = vi.fn();
+    const onTokenMock = vi.fn();
+    const onCompletionMock = vi.fn();
+
+    const protocolStream = OllamaStream(mockOllamaStream, {
+      onStart: onStartMock,
+      onText: onTextMock,
+      onToken: onTokenMock,
+      onCompletion: onCompletionMock,
+    });
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      'id: chat_1\n',
+      'event: text\n',
+      `data: "Hello"\n\n`,
+      'id: chat_1\n',
+      'event: text\n',
+      `data: " world!"\n\n`,
+      'id: chat_1\n',
+      'event: stop\n',
+      `data: "finished"\n\n`,
+    ]);
+
+    expect(onStartMock).toHaveBeenCalledTimes(1);
+    expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
+    expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
+    expect(onTokenMock).toHaveBeenCalledTimes(2);
+    expect(onCompletionMock).toHaveBeenCalledTimes(1);
+  });
+
+  it('should handle empty stream', async () => {
+    const mockOllamaStream = {
+      async *[Symbol.asyncIterator]() {},
+    };
+
+    const protocolStream = OllamaStream(mockOllamaStream);
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([]);
+  });
+});
diff --git a/src/libs/agent-runtime/utils/streams/openai.test.ts b/src/libs/agent-runtime/utils/streams/openai.test.ts
new file mode 100644
index 000000000000..203775d86d59
--- /dev/null
+++ b/src/libs/agent-runtime/utils/streams/openai.test.ts
@@ -0,0 +1,263 @@
+import { describe, expect, it, vi } from 'vitest';
+
+import { OpenAIStream } from './openai';
+
+describe('OpenAIStream', () => {
+  it('should transform OpenAI stream to protocol stream', async () => {
+    const mockOpenAIStream = new ReadableStream({
+      start(controller) {
+        controller.enqueue({
+          choices: [
+            {
+              delta: { content: 'Hello' },
+              index: 0,
+            },
+          ],
+          id: '1',
+        });
+        controller.enqueue({
+          choices: [
+            {
+              delta: { content: ' world!' },
+              index: 1,
+            },
+          ],
+          id: '1',
+        });
+        controller.enqueue({
+          choices: [
+            {
+              delta: null,
+              finish_reason: 'stop',
+              index: 2,
+            },
+          ],
+          id: '1',
+        });
+
+        controller.close();
+      },
+    });
+
+    const onStartMock = vi.fn();
+    const onTextMock = vi.fn();
+    const onTokenMock = vi.fn();
+    const onCompletionMock = vi.fn();
+
+    const protocolStream = OpenAIStream(mockOpenAIStream, {
+      onStart: onStartMock,
+      onText: onTextMock,
+      onToken: onTokenMock,
+      onCompletion: onCompletionMock,
+    });
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      'id: 1\n',
+      'event: text\n',
+      `data: "Hello"\n\n`,
+      'id: 1\n',
+      'event: text\n',
+      `data: " world!"\n\n`,
+      'id: 1\n',
+      'event: stop\n',
+      `data: "stop"\n\n`,
+    ]);
+
+    expect(onStartMock).toHaveBeenCalledTimes(1);
+    expect(onTextMock).toHaveBeenNthCalledWith(1, '"Hello"');
+    expect(onTextMock).toHaveBeenNthCalledWith(2, '" world!"');
+    expect(onTokenMock).toHaveBeenCalledTimes(2);
+    expect(onCompletionMock).toHaveBeenCalledTimes(1);
+  });
+
+  it('should handle tool calls', async () => {
+    const mockOpenAIStream = new ReadableStream({
+      start(controller) {
+        controller.enqueue({
+          choices: [
+            {
+              delta: {
+                tool_calls: [
+                  {
+                    function: { name: 'tool1', arguments: '{}' },
+                    id: 'call_1',
+                    index: 0,
+                    type: 'function',
+                  },
+                  {
+                    function: { name: 'tool2', arguments: '{}' },
+                    id: 'call_2',
+                    index: 1,
+                  },
+                ],
+              },
+              index: 0,
+            },
+          ],
+          id: '2',
+        });
+
+        controller.close();
+      },
+    });
+
+    const onToolCallMock = vi.fn();
+
+    const protocolStream = OpenAIStream(mockOpenAIStream, {
+      onToolCall: onToolCallMock,
+    });
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      'id: 2\n',
+      'event: tool_calls\n',
+      `data: [{"function":{"name":"tool1","arguments":"{}"},"id":"call_1","index":0,"type":"function"},{"function":{"name":"tool2","arguments":"{}"},"id":"call_2","index":1,"type":"function"}]\n\n`,
+    ]);
+
+    expect(onToolCallMock).toHaveBeenCalledTimes(1);
+  });
+
+  it('should handle empty stream', async () => {
+    const mockStream = new ReadableStream({
+      start(controller) {
+        controller.close();
+      },
+    });
+
+    const protocolStream = OpenAIStream(mockStream);
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([]);
+  });
+
+  it('should handle delta content null', async () => {
+    const mockOpenAIStream = new ReadableStream({
+      start(controller) {
+        controller.enqueue({
+          choices: [
+            {
+              delta: { content: null },
+              index: 0,
+            },
+          ],
+          id: '3',
+        });
+
+        controller.close();
+      },
+    });
+
+    const protocolStream = OpenAIStream(mockOpenAIStream);
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual(['id: 3\n', 'event: data\n', `data: {"content":null}\n\n`]);
+  });
+
+  it('should handle other delta data', async () => {
+    const mockOpenAIStream = new ReadableStream({
+      start(controller) {
+        controller.enqueue({
+          choices: [
+            {
+              delta: { custom_field: 'custom_value' },
+              index: 0,
+            },
+          ],
+          id: '4',
+        });
+
+        controller.close();
+      },
+    });
+
+    const protocolStream = OpenAIStream(mockOpenAIStream);
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      'id: 4\n',
+      'event: data\n',
+      `data: {"delta":{"custom_field":"custom_value"},"id":"4","index":0}\n\n`,
+    ]);
+  });
+
+  it('should handle tool calls without index and type', async () => {
+    const mockOpenAIStream = new ReadableStream({
+      start(controller) {
+        controller.enqueue({
+          choices: [
+            {
+              delta: {
+                tool_calls: [
+                  {
+                    function: { name: 'tool1', arguments: '{}' },
+                    id: 'call_1',
+                  },
+                  {
+                    function: { name: 'tool2', arguments: '{}' },
+                    id: 'call_2',
+                  },
+                ],
+              },
+              index: 0,
+            },
+          ],
+          id: '5',
+        });
+
+        controller.close();
+      },
+    });
+
+    const protocolStream = OpenAIStream(mockOpenAIStream);
+
+    const decoder = new TextDecoder();
+    const chunks = [];
+
+    // @ts-ignore
+    for await (const chunk of protocolStream) {
+      chunks.push(decoder.decode(chunk, { stream: true }));
+    }
+
+    expect(chunks).toEqual([
+      'id: 5\n',
+      'event: tool_calls\n',
+      `data: [{"function":{"name":"tool1","arguments":"{}"},"id":"call_1","index":0,"type":"function"},{"function":{"name":"tool2","arguments":"{}"},"id":"call_2","index":1,"type":"function"}]\n\n`,
+    ]);
+  });
+});
diff --git a/src/store/tool/slices/builtin/action.test.ts b/src/store/tool/slices/builtin/action.test.ts
new file mode 100644
index 000000000000..b953ec480fb9
--- /dev/null
+++ b/src/store/tool/slices/builtin/action.test.ts
@@ -0,0 +1,90 @@
+import { act, renderHook } from '@testing-library/react';
+import { describe, expect, it, vi } from 'vitest';
+
+import { useToolStore } from '../../store';
+
+vi.mock('zustand/traditional');
+
+describe('createBuiltinToolSlice', () => {
+  describe('invokeBuiltinTool', () => {
+    it('should return if the tool is already loading', async () => {
+      // Given
+      const key = 'text2image';
+      const params = {};
+
+      const mockFn = vi.fn();
+      const { result } = renderHook(() => useToolStore());
+
+      act(() => {
+        useToolStore.setState({
+          text2image: mockFn,
+        });
+      });
+
+      await act(async () => {
+        // When
+        const data = await result.current.invokeBuiltinTool(key, params);
+        expect(data).toBeUndefined();
+      });
+
+      // Then
+      expect(mockFn).toHaveBeenCalled();
+    });
+
+    it('should invoke the specified tool action and return the stringified result', async () => {
+      // Given
+      const key = 'text2image';
+
+      const mockFn = vi.fn();
+      const { result } = renderHook(() => useToolStore());
+
+      const params = {
+        prompts: ['test prompt'],
+        size: '512x512',
+        quality: 'standard',
+        style: 'vivid',
+      };
+
+      act(() => {
+        useToolStore.setState({
+          builtinToolLoading: { [key]: false },
+          text2image: mockFn,
+        });
+      });
+      // When
+      await act(async () => {
+        await result.current.invokeBuiltinTool(key, params);
+      });
+
+      expect(mockFn).toBeCalledWith({
+        prompts: ['test prompt'],
+        quality: 'standard',
+        size: '512x512',
+        style: 'vivid',
+      });
+    });
+  });
+
+  describe('text2image', () => {
+    it('should map the prompts to DallEImageItem objects', () => {
+      // When
+      const { result } = renderHook(() => useToolStore());
+
+      const data = result.current.text2image(
+        {
+          prompts: ['prompt1', 'prompt2'],
+          size: '1024x1024',
+          quality: 'standard',
+          style: 'vivid',
+        },
+        'a',
+      );
+
+      // Then
+      expect(data).toEqual([
+        { prompt: 'prompt1', quality: 'standard', size: '1024x1024', style: 'vivid' },
+        { prompt: 'prompt2', quality: 'standard', size: '1024x1024', style: 'vivid' },
+      ]);
+    });
+  });
+});
diff --git a/src/utils/fetch.test.ts b/src/utils/fetch.test.ts
index 954c08551987..e70151340d32 100644
--- a/src/utils/fetch.test.ts
+++ b/src/utils/fetch.test.ts
@@ -1,8 +1,10 @@
+import { fetchEventSource } from '@microsoft/fetch-event-source';
+import { FetchEventSourceInit } from '@microsoft/fetch-event-source';
 import { afterEach, describe, expect, it, vi } from 'vitest';
 
 import { ErrorResponse } from '@/types/fetch';
 
-import { getMessageError, parseToolCalls } from './fetch';
+import { fetchSSE, getMessageError, parseToolCalls } from './fetch';
 
 // 模拟 i18next
 vi.mock('i18next', () => ({
@@ -39,6 +41,10 @@ const createMockResponse = (body: any, ok: boolean, status: number = 200) => ({
   },
 });
 
+vi.mock('@microsoft/fetch-event-source', () => ({
+  fetchEventSource: vi.fn(),
+}));
+
 // 在每次测试后清理所有模拟
 afterEach(() => {
   vi.restoreAllMocks();
@@ -168,3 +174,150 @@ describe('parseToolCalls', () => {
     ]);
   });
 });
+
+describe('fetchSSE', () => {
+  it('should handle text event correctly', async () => {
+    const mockOnMessageHandle = vi.fn();
+    const mockOnFinish = vi.fn();
+
+    (fetchEventSource as any).mockImplementationOnce(
+      (url: string, options: FetchEventSourceInit) => {
+        options.onopen!({ clone: () => ({ ok: true, headers: new Headers() }) } as any);
+        options.onmessage!({ event: 'text', data: JSON.stringify('Hello') } as any);
+        options.onmessage!({ event: 'text', data: JSON.stringify(' World') } as any);
+      },
+    );
+
+    await fetchSSE('/', { onMessageHandle: mockOnMessageHandle, onFinish: mockOnFinish });
+
+    expect(mockOnMessageHandle).toHaveBeenNthCalledWith(1, { text: 'Hello', type: 'text' });
+    expect(mockOnMessageHandle).toHaveBeenNthCalledWith(2, { text: ' World', type: 'text' });
+    expect(mockOnFinish).toHaveBeenCalledWith('Hello World', {
+      observationId: null,
+      toolCalls: undefined,
+      traceId: null,
+      type: 'done',
+    });
+  });
+
+  it('should handle tool_calls event correctly', async () => {
+    const mockOnMessageHandle = vi.fn();
+    const mockOnFinish = vi.fn();
+
+    (fetchEventSource as any).mockImplementationOnce(
+      (url: string, options: FetchEventSourceInit) => {
+        options.onopen!({ clone: () => ({ ok: true, headers: new Headers() }) } as any);
+        options.onmessage!({
+          event: 'tool_calls',
+          data: JSON.stringify([
+            { index: 0, id: '1', type: 'function', function: { name: 'func1', arguments: 'arg1' } },
+          ]),
+        } as any);
+        options.onmessage!({
+          event: 'tool_calls',
+          data: JSON.stringify([
+            { index: 1, id: '2', type: 'function', function: { name: 'func2', arguments: 'arg2' } },
+          ]),
+        } as any);
+      },
+    );
+
+    await fetchSSE('/', { onMessageHandle: mockOnMessageHandle, onFinish: mockOnFinish });
+
+    expect(mockOnMessageHandle).toHaveBeenNthCalledWith(1, {
+      tool_calls: [{ id: '1', type: 'function', function: { name: 'func1', arguments: 'arg1' } }],
+      type: 'tool_calls',
+    });
+    expect(mockOnMessageHandle).toHaveBeenNthCalledWith(2, {
+      tool_calls: [
+        { id: '1', type: 'function', function: { name: 'func1', arguments: 'arg1' } },
+        { id: '2', type: 'function', function: { name: 'func2', arguments: 'arg2' } },
+      ],
+      type: 'tool_calls',
+    });
+    expect(mockOnFinish).toHaveBeenCalledWith('', {
+      observationId: null,
+      toolCalls: [
+        { id: '1', type: 'function', function: { name: 'func1', arguments: 'arg1' } },
+        { id: '2', type: 'function', function: { name: 'func2', arguments: 'arg2' } },
+      ],
+      traceId: null,
+      type: 'done',
+    });
+  });
+
+  it('should call onAbort when AbortError is thrown', async () => {
+    const mockOnAbort = vi.fn();
+
+    (fetchEventSource as any).mockImplementationOnce(
+      (url: string, options: FetchEventSourceInit) => {
+        options.onmessage!({ event: 'text', data: JSON.stringify('Hello') } as any);
+        options.onerror!({ name: 'AbortError' });
+      },
+    );
+
+    await fetchSSE('/', { onAbort: mockOnAbort });
+
+    expect(mockOnAbort).toHaveBeenCalledWith('Hello');
+  });
+
+  it('should call onErrorHandle when other error is thrown', async () => {
+    const mockOnErrorHandle = vi.fn();
+    const mockError = new Error('Unknown error');
+
+    (fetchEventSource as any).mockImplementationOnce(
+      (url: string, options: FetchEventSourceInit) => {
+        options.onerror!(mockError);
+      },
+    );
+
+    await fetchSSE('/', { onErrorHandle: mockOnErrorHandle });
+
+    expect(mockOnErrorHandle).not.toHaveBeenCalled();
+  });
+
+  it('should call onErrorHandle when response is not ok', async () => {
+    const mockOnErrorHandle = vi.fn();
+
+    (fetchEventSource as any).mockImplementationOnce(
+      (url: string, options: FetchEventSourceInit) => {
+        const res = new Response(JSON.stringify({ errorType: 'SomeError' }), {
+          status: 400,
+          statusText: 'Error',
+        });
+
+        options.onopen!(res as any);
+      },
+    );
+
+    await fetchSSE('/', { onErrorHandle: mockOnErrorHandle });
+
+    expect(mockOnErrorHandle).toHaveBeenCalledWith({
+      body: undefined,
+      message: 'translated_response.SomeError',
+      type: 'SomeError',
+    });
+  });
+
+  it('should call onMessageHandle with full text if no message event', async () => {
+    const mockOnMessageHandle = vi.fn();
+    const mockOnFinish = vi.fn();
+
+    (fetchEventSource as any).mockImplementationOnce(
+      (url: string, options: FetchEventSourceInit) => {
+        const res = new Response('Hello World', { status: 200, statusText: 'OK' });
+        options.onopen!(res as any);
+      },
+    );
+
+    await fetchSSE('/', { onMessageHandle: mockOnMessageHandle, onFinish: mockOnFinish });
+
+    expect(mockOnMessageHandle).toHaveBeenCalledWith({ text: 'Hello World', type: 'text' });
+    expect(mockOnFinish).toHaveBeenCalledWith('Hello World', {
+      observationId: null,
+      toolCalls: undefined,
+      traceId: null,
+      type: 'done',
+    });
+  });
+});
diff --git a/src/utils/fetch.ts b/src/utils/fetch.ts
index d89cae89a9c9..bc2d70aa16ed 100644
--- a/src/utils/fetch.ts
+++ b/src/utils/fetch.ts
@@ -162,7 +162,8 @@ export const fetchSSE = async (url: string, options: RequestInit & FetchSSEOptio
   if (response) {
     // if there is no onMessageHandler, we should call onHandleMessage first
     if (!triggerOnMessageHandler) {
-      options.onMessageHandle?.({ text: await response.clone().text(), type: 'text' });
+      output = await response.clone().text();
+      options.onMessageHandle?.({ text: output, type: 'text' });
     }
 
     const traceId = response.headers.get(LOBE_CHAT_TRACE_ID);

From 5e5add82780e49405814c52abfc3dbc339fedd2f Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Sat, 11 May 2024 22:00:54 +0800
Subject: [PATCH 22/24] =?UTF-8?q?=E2=9C=85=20test:=20add=20test?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/store/chat/slices/plugin/action.test.ts | 345 ++++++++------------
 1 file changed, 138 insertions(+), 207 deletions(-)

diff --git a/src/store/chat/slices/plugin/action.test.ts b/src/store/chat/slices/plugin/action.test.ts
index 627bf70286bb..199c80909127 100644
--- a/src/store/chat/slices/plugin/action.test.ts
+++ b/src/store/chat/slices/plugin/action.test.ts
@@ -2,6 +2,7 @@ import { act, renderHook } from '@testing-library/react';
 import { Md5 } from 'ts-md5';
 import { Mock, afterEach, describe, expect, it, vi } from 'vitest';
 
+import { LOADING_FLAT } from '@/const/message';
 import { PLUGIN_SCHEMA_API_MD5_PREFIX, PLUGIN_SCHEMA_SEPARATOR } from '@/const/plugin';
 import { chatService } from '@/services/chat';
 import { messageService } from '@/services/message';
@@ -174,248 +175,178 @@ describe('ChatPluginAction', () => {
     });
   });
 
-  describe.skip('triggerToolCalls', () => {
-    it('should trigger a function call and update the plugin message accordingly', async () => {
-      const messageId = 'message-id';
-      const messageContent = JSON.stringify({
-        tool_calls: [
+  describe('triggerToolCalls', () => {
+    it('should trigger tool calls for the assistant message', async () => {
+      const assistantId = 'assistant-id';
+      const message = {
+        id: assistantId,
+        role: 'assistant',
+        content: 'Assistant message',
+        tools: [
           {
-            id: 'call_sbca',
-            type: 'function',
-            function: {
-              name: `pluginName${PLUGIN_SCHEMA_SEPARATOR}apiName`,
-              arguments: { key: 'value' },
-            },
+            id: 'tool1',
+            type: 'standalone',
+            identifier: 'plugin1',
+            apiName: 'api1',
+            arguments: '{}',
           },
-        ],
-      });
-      const messagePluginPayload = {
-        apiName: 'apiName',
-        identifier: 'pluginName',
-        type: 'default',
-        arguments: { key: 'value' },
-      };
-
-      const refreshSpy = vi.spyOn(useChatStore.getState(), 'refreshMessages');
-      const invokeSpy = vi.spyOn(useChatStore.getState(), 'invokeDefaultTypePlugin');
-
-      const { result } = renderHook(() => useChatStore());
-
-      await act(async () => {
-        useChatStore.setState({
-          runPluginApi: vi.fn(),
-          messages: [
-            {
-              id: messageId,
-              content: messageContent,
-              tools: [
-                {
-                  id: 'call_sbca',
-                  type: 'default',
-                  identifier: 'pluginName',
-                  apiName: 'apiName',
-                  arguments: "{ key: 'value' }",
-                },
-              ],
-            } as ChatMessage,
-          ],
-        });
-
-        await result.current.triggerToolCalls(messageId);
-      });
-
-      expect(messageService.updateMessage).toHaveBeenCalledWith(messageId, {
-        content: '',
-        plugin: messagePluginPayload,
-        role: 'function',
-      });
-      expect(refreshSpy).toHaveBeenCalled();
-      expect(invokeSpy).toHaveBeenCalledWith(messageId, messagePluginPayload);
-    });
-
-    it('should handle function call with MD5 prefixed API name', async () => {
-      const messageId = 'message-id';
-      const apiName = 'originalApiName';
-      const id = 'pluginIdentifier';
-      const md5ApiName = PLUGIN_SCHEMA_API_MD5_PREFIX + Md5.hashStr(apiName).toString();
-      const messageContent = JSON.stringify({
-        tool_calls: [
           {
-            id: 'call_sbca',
-            type: 'function',
-            function: {
-              name: id + PLUGIN_SCHEMA_SEPARATOR + md5ApiName,
-              arguments: {},
-            },
+            id: 'tool2',
+            type: 'markdown',
+            identifier: 'plugin2',
+            apiName: 'api2',
+            arguments: '{}',
+          },
+          {
+            id: 'tool3',
+            type: 'builtin',
+            identifier: 'builtin1',
+            apiName: 'api3',
+            arguments: '{}',
           },
-        ],
-      });
-
-      const plugin = { identifier: id, manifest: { api: [{ name: apiName }] } } as LobeTool;
-
-      useToolStore.setState({ installedPlugins: [plugin] });
-
-      vi.spyOn(chatSelectors, 'getMessageById').mockImplementationOnce(
-        () => () =>
-          ({
-            id: messageId,
-            content: messageContent,
-          }) as any,
-      );
-
-      const { result } = renderHook(() => useChatStore());
-      vi.spyOn(result.current, 'invokeDefaultTypePlugin');
-      vi.spyOn(result.current, 'refreshMessages');
-
-      await act(async () => {
-        await result.current.triggerToolCalls(messageId);
-      });
-      expect(result.current.refreshMessages).toHaveBeenCalled();
-
-      expect(messageService.updateMessage).toHaveBeenCalledWith(
-        messageId,
-        expect.objectContaining({
-          // 确保正确的 API 名称被设置
-          plugin: expect.objectContaining({ apiName }),
-        }),
-      );
-      expect(result.current.invokeDefaultTypePlugin).toHaveBeenCalledWith(
-        messageId,
-        expect.objectContaining({
-          apiName: apiName,
-        }),
-      );
-    });
-
-    it('should handle standalone plugin type', async () => {
-      const messageId = 'message-id';
-      const messageContent = JSON.stringify({
-        tool_calls: [
           {
-            id: 'call_scv',
-            function: {
-              name: `pluginName${PLUGIN_SCHEMA_SEPARATOR}apiName${PLUGIN_SCHEMA_SEPARATOR}standalone`,
-              arguments: {},
-            },
+            id: 'tool4',
+            type: 'default',
+            identifier: 'plugin3',
+            apiName: 'api4',
+            arguments: '{}',
           },
         ],
-      });
+      } as ChatMessage;
 
-      const invokeStandaloneTypePlugin = useChatStore.getState().invokeStandaloneTypePlugin;
+      const invokeStandaloneTypePluginMock = vi.fn();
+      const invokeMarkdownTypePluginMock = vi.fn();
+      const invokeBuiltinToolMock = vi.fn();
+      const invokeDefaultTypePluginMock = vi.fn().mockResolvedValue('Default tool response');
+      const triggerAIMessageMock = vi.fn();
+      const internal_createMessageMock = vi.fn().mockResolvedValue('tool-message-id');
+      const getTraceIdByMessageIdMock = vi.fn().mockReturnValue('trace-id');
 
       act(() => {
         useChatStore.setState({
-          refreshMessages: vi.fn(),
-          invokeStandaloneTypePlugin: vi.fn(),
+          messages: [message],
+          invokeStandaloneTypePlugin: invokeStandaloneTypePluginMock,
+          invokeMarkdownTypePlugin: invokeMarkdownTypePluginMock,
+          invokeBuiltinTool: invokeBuiltinToolMock,
+          invokeDefaultTypePlugin: invokeDefaultTypePluginMock,
+          triggerAIMessage: triggerAIMessageMock,
+          internal_createMessage: internal_createMessageMock,
+          activeId: 'session-id',
+          activeTopicId: 'topic-id',
         });
       });
 
-      vi.spyOn(chatSelectors, 'getMessageById').mockImplementation(
-        () => () =>
-          ({
-            id: messageId,
-            content: messageContent,
-          }) as any,
-      );
-
       const { result } = renderHook(() => useChatStore());
 
       await act(async () => {
-        await result.current.triggerToolCalls(messageId);
-      });
-
-      // 验证 refreshMessages 是否被调用
-      expect(result.current.refreshMessages).toHaveBeenCalled();
-
-      // 验证 invokeDefaultTypePlugin 是否没有被调用，因为类型是 standalone
-      expect(result.current.invokeDefaultTypePlugin).not.toHaveBeenCalled();
-      expect(result.current.invokeStandaloneTypePlugin).toHaveBeenCalled();
+        await result.current.triggerToolCalls(assistantId);
+      });
+
+      // Verify that tool messages were created for each tool call
+      expect(internal_createMessageMock).toHaveBeenCalledTimes(4);
+      expect(internal_createMessageMock).toHaveBeenCalledWith({
+        content: LOADING_FLAT,
+        parentId: assistantId,
+        plugin: message.tools![0],
+        role: 'tool',
+        sessionId: 'session-id',
+        tool_call_id: 'tool1',
+        topicId: 'topic-id',
+      });
+      // ... similar assertions for other tool calls
+
+      // Verify that the appropriate plugin types were invoked
+      expect(invokeStandaloneTypePluginMock).toHaveBeenCalledWith(
+        'tool-message-id',
+        message.tools![0],
+      );
+      expect(invokeMarkdownTypePluginMock).toHaveBeenCalledWith(
+        'tool-message-id',
+        message.tools![1],
+      );
+      expect(invokeBuiltinToolMock).toHaveBeenCalledWith('tool-message-id', message.tools![2]);
+      expect(invokeDefaultTypePluginMock).toHaveBeenCalledWith(
+        'tool-message-id',
+        message.tools![3],
+      );
 
-      useChatStore.setState({ invokeStandaloneTypePlugin });
+      // Verify that AI message was triggered for default type tool call
+      // expect(getTraceIdByMessageIdMock).toHaveBeenCalledWith('tool-message-id');
+      // expect(triggerAIMessageMock).toHaveBeenCalledWith({ traceId: 'trace-id' });
     });
 
-    it('should handle builtin plugin type', async () => {
-      const messageId = 'message-id';
-      const messageContent = JSON.stringify({
-        tool_calls: [
+    it('should not trigger AI message if no default type tool calls', async () => {
+      const assistantId = 'assistant-id';
+      const message = {
+        id: assistantId,
+        role: 'assistant',
+        content: 'Assistant message',
+        tools: [
           {
-            id: 'call_scv',
-            function: {
-              name: `pluginName${PLUGIN_SCHEMA_SEPARATOR}apiName${PLUGIN_SCHEMA_SEPARATOR}builtin`,
-              arguments: {},
-            },
+            id: 'tool1',
+            type: 'standalone',
+            identifier: 'plugin1',
+            apiName: 'api1',
+            arguments: '{}',
+          },
+          {
+            id: 'tool2',
+            type: 'markdown',
+            identifier: 'plugin2',
+            apiName: 'api2',
+            arguments: '{}',
           },
-        ],
-      });
-
-      const invokeBuiltinTool = useChatStore.getState().invokeBuiltinTool;
-      useChatStore.setState({ refreshMessages: vi.fn(), invokeBuiltinTool: vi.fn() });
-
-      vi.spyOn(chatSelectors, 'getMessageById').mockImplementation(
-        () => () =>
-          ({
-            id: messageId,
-            content: messageContent,
-          }) as any,
-      );
-
-      const { result } = renderHook(() => useChatStore());
-
-      await act(async () => {
-        await result.current.triggerToolCalls(messageId);
-      });
-
-      // 验证 refreshMessages 是否被调用
-      expect(result.current.refreshMessages).toHaveBeenCalled();
-
-      // 验证 invokeDefaultTypePlugin 是否没有被调用，因为类型是 standalone
-      expect(result.current.invokeDefaultTypePlugin).not.toHaveBeenCalled();
-      expect(result.current.invokeBuiltinTool).toHaveBeenCalled();
-
-      useChatStore.setState({ invokeBuiltinTool });
-    });
-
-    it('should handle markdown plugin type', async () => {
-      const messageId = 'message-id';
-      const messageContent = JSON.stringify({
-        tool_calls: [
           {
-            id: 'call_scv',
-            function: {
-              name: `pluginName${PLUGIN_SCHEMA_SEPARATOR}apiName${PLUGIN_SCHEMA_SEPARATOR}markdown`,
-              arguments: {},
-            },
+            id: 'tool3',
+            type: 'builtin',
+            identifier: 'builtin1',
+            apiName: 'api3',
+            arguments: '{}',
           },
         ],
-      });
+      } as ChatMessage;
 
-      const invokeMarkdownTypePlugin = useChatStore.getState().invokeMarkdownTypePlugin;
-      useChatStore.setState({
-        refreshMessages: vi.fn(),
-        invokeMarkdownTypePlugin: vi.fn(),
-      });
+      const invokeStandaloneTypePluginMock = vi.fn();
+      const invokeMarkdownTypePluginMock = vi.fn();
+      const invokeBuiltinToolMock = vi.fn();
+      const triggerAIMessageMock = vi.fn();
+      const internal_createMessageMock = vi.fn().mockResolvedValue('tool-message-id');
 
-      vi.spyOn(chatSelectors, 'getMessageById').mockImplementation(
-        () => () =>
-          ({
-            id: messageId,
-            content: messageContent,
-          }) as any,
-      );
+      act(() => {
+        useChatStore.setState({
+          invokeStandaloneTypePlugin: invokeStandaloneTypePluginMock,
+          invokeMarkdownTypePlugin: invokeMarkdownTypePluginMock,
+          invokeBuiltinTool: invokeBuiltinToolMock,
+          triggerAIMessage: triggerAIMessageMock,
+          internal_createMessage: internal_createMessageMock,
+          activeId: 'session-id',
+          messages: [message],
+          activeTopicId: 'topic-id',
+        });
+      });
 
       const { result } = renderHook(() => useChatStore());
 
       await act(async () => {
-        await result.current.triggerToolCalls(messageId);
+        await result.current.triggerToolCalls(assistantId);
       });
 
-      // 验证 refreshMessages 是否被调用
-      expect(result.current.refreshMessages).toHaveBeenCalled();
+      // Verify that tool messages were created for each tool call
+      expect(internal_createMessageMock).toHaveBeenCalledTimes(3);
 
-      expect(result.current.invokeDefaultTypePlugin).not.toHaveBeenCalled();
-      expect(result.current.invokeMarkdownTypePlugin).toHaveBeenCalled();
+      // Verify that the appropriate plugin types were invoked
+      expect(invokeStandaloneTypePluginMock).toHaveBeenCalledWith(
+        'tool-message-id',
+        message.tools![0],
+      );
+      expect(invokeMarkdownTypePluginMock).toHaveBeenCalledWith(
+        'tool-message-id',
+        message.tools![1],
+      );
+      expect(invokeBuiltinToolMock).toHaveBeenCalledWith('tool-message-id', message.tools![2]);
 
-      useChatStore.setState({ invokeMarkdownTypePlugin });
+      // Verify that AI message was not triggered
+      expect(triggerAIMessageMock).not.toHaveBeenCalled();
     });
   });
 

From 0677a51f507e4ff25aac7cbab491c56f5f000262 Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Sat, 11 May 2024 22:47:25 +0800
Subject: [PATCH 23/24] =?UTF-8?q?=F0=9F=8E=A8=20chore:=20improve=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/libs/agent-runtime/google/index.ts | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/libs/agent-runtime/google/index.ts b/src/libs/agent-runtime/google/index.ts
index cf4c0689ac9b..708b39e16341 100644
--- a/src/libs/agent-runtime/google/index.ts
+++ b/src/libs/agent-runtime/google/index.ts
@@ -271,8 +271,6 @@ export class LobeGoogleAI implements LobeRuntimeAI {
   };
 
   private convertSchemaObject(schema: JSONSchema7): FunctionDeclarationSchemaProperty {
-    console.log('input:', schema);
-
     switch (schema.type) {
       default:
       case 'object': {

From fed708346bdb6d780e13ebf409dcb0dd594eee69 Mon Sep 17 00:00:00 2001
From: arvinxx <arvinx@foxmail.com>
Date: Sat, 11 May 2024 23:11:03 +0800
Subject: [PATCH 24/24] =?UTF-8?q?=F0=9F=90=9B=20fix:=20fix=20auto=20meta?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/features/AgentSetting/store/action.ts | 21 +++++++++++++------
 src/store/chat/slices/enchance/action.ts  | 25 ++++++++++++++---------
 src/utils/fetch.ts                        |  2 +-
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/src/features/AgentSetting/store/action.ts b/src/features/AgentSetting/store/action.ts
index 5176fe661253..ad0fb7ca6bde 100644
--- a/src/features/AgentSetting/store/action.ts
+++ b/src/features/AgentSetting/store/action.ts
@@ -8,6 +8,7 @@ import { TraceNameMap, TracePayload, TraceTopicType } from '@/const/trace';
 import { chatService } from '@/services/chat';
 import { LobeAgentConfig } from '@/types/agent';
 import { MetaData } from '@/types/meta';
+import { MessageTextChunk } from '@/utils/fetch';
 import { setNamespace } from '@/utils/storeDebug';
 
 import { SessionLoadingState } from '../store/initialState';
@@ -246,17 +247,25 @@ export const store: StateCreator<Store, [['zustand/devtools', never]]> = (set, g
 
   streamUpdateMetaArray: (key: keyof MetaData) => {
     let value = '';
-    return (text: string) => {
-      value += text;
-      get().dispatchMeta({ type: 'update', value: { [key]: value.split(',') } });
+    return (chunk: MessageTextChunk) => {
+      switch (chunk.type) {
+        case 'text': {
+          value += chunk.text;
+          get().dispatchMeta({ type: 'update', value: { [key]: value.split(',') } });
+        }
+      }
     };
   },
 
   streamUpdateMetaString: (key: keyof MetaData) => {
     let value = '';
-    return (text: string) => {
-      value += text;
-      get().dispatchMeta({ type: 'update', value: { [key]: value } });
+    return (chunk: MessageTextChunk) => {
+      switch (chunk.type) {
+        case 'text': {
+          value += chunk.text;
+          get().dispatchMeta({ type: 'update', value: { [key]: value } });
+        }
+      }
     };
   },
 
diff --git a/src/store/chat/slices/enchance/action.ts b/src/store/chat/slices/enchance/action.ts
index 778fe5246b95..b28f0a69654e 100644
--- a/src/store/chat/slices/enchance/action.ts
+++ b/src/store/chat/slices/enchance/action.ts
@@ -76,16 +76,21 @@ export const chatEnhance: StateCreator<
 
     // translate to target language
     await chatService.fetchPresetTaskResult({
-      onMessageHandle: (text) => {
-        internal_dispatchMessage({
-          id,
-          key: 'translate',
-          type: 'updateMessageExtra',
-          value: produce({ content: '', from, to: targetLang }, (draft) => {
-            content += text;
-            draft.content += content;
-          }),
-        });
+      onMessageHandle: (chunk) => {
+        switch (chunk.type) {
+          case 'text': {
+            internal_dispatchMessage({
+              id,
+              key: 'translate',
+              type: 'updateMessageExtra',
+              value: produce({ content: '', from, to: targetLang }, (draft) => {
+                content += chunk.text;
+                draft.content += content;
+              }),
+            });
+            break;
+          }
+        }
       },
       params: chainTranslate(message.content, targetLang),
       trace: get().getCurrentTracePayload({ traceName: TraceNameMap.Translator }),
diff --git a/src/utils/fetch.ts b/src/utils/fetch.ts
index bc2d70aa16ed..748cf044fe86 100644
--- a/src/utils/fetch.ts
+++ b/src/utils/fetch.ts
@@ -45,7 +45,7 @@ export type OnFinishHandler = (
   },
 ) => Promise<void>;
 
-interface MessageTextChunk {
+export interface MessageTextChunk {
   text: string;
   type: 'text';
 }