diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 097c9440be2d9..a916d9842e026 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/app.d.ts b/tools/server/webui/src/app.d.ts index eb14d6fe45143..a362c24b0f9b7 100644 --- a/tools/server/webui/src/app.d.ts +++ b/tools/server/webui/src/app.d.ts @@ -12,9 +12,20 @@ import type { ApiContextSizeError, ApiErrorResponse, ApiLlamaCppServerProps, - ApiProcessingState + ApiProcessingState, + ApiRouterModelMeta, + ApiRouterModelsLoadRequest, + ApiRouterModelsLoadResponse, + ApiRouterModelsStatusRequest, + ApiRouterModelsStatusResponse, + ApiRouterModelsListResponse, + ApiRouterModelsUnloadRequest, + ApiRouterModelsUnloadResponse } from '$lib/types/api'; +import { ServerMode, ServerModelStatus } from '$lib/enums/server'; +import { ModelModality } from '$lib/enums/model'; + import type { ChatMessageType, ChatRole, @@ -60,6 +71,14 @@ declare global { ApiErrorResponse, ApiLlamaCppServerProps, ApiProcessingState, + ApiRouterModelMeta, + ApiRouterModelsLoadRequest, + ApiRouterModelsLoadResponse, + ApiRouterModelsStatusRequest, + ApiRouterModelsStatusResponse, + ApiRouterModelsListResponse, + ApiRouterModelsUnloadRequest, + ApiRouterModelsUnloadResponse, ChatMessageData, ChatMessagePromptProgress, ChatMessageSiblingInfo, @@ -75,6 +94,9 @@ declare global { DatabaseMessageExtraTextFile, DatabaseMessageExtraPdfFile, DatabaseMessageExtraLegacyContext, + ModelModality, + ServerMode, + ServerModelStatus, SettingsConfigValue, SettingsFieldConfig, SettingsConfigType, diff --git a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreview.svelte b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreview.svelte index 212b1fe890a11..015f6fc3d7bc5 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreview.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreview.svelte @@ -1,8 +1,11 @@ -
+
- {#if currentConfig.modelSelectorEnabled} - - {/if} + {#if isLoading} {:else} - + {#if shouldShowRecordButton} + + {/if} - + {#if shouldShowSubmitButton} + + {/if} {/if}
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte index 865d81ba44813..7342e979decf6 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte @@ -1,27 +1,18 @@ - +

llama.cpp

@@ -154,8 +154,6 @@ - -
import * as Dialog from '$lib/components/ui/dialog'; + import { ModelModality } from '$lib/enums/model'; + import { AttachmentType } from '$lib/enums/attachment'; + import type { DatabaseMessageExtra } from '$lib/types/database'; import { ChatAttachmentPreview } from '$lib/components/app'; import { formatFileSize } from '$lib/utils/file-preview'; interface Props { open: boolean; + onOpenChange?: (open: boolean) => void; // Either an uploaded file or a stored attachment uploadedFile?: ChatUploadedFile; attachment?: DatabaseMessageExtra; @@ -18,6 +22,7 @@ let { open = $bindable(), + onOpenChange, uploadedFile, attachment, preview, @@ -32,16 +37,17 @@ let displayName = $derived(uploadedFile?.name || attachment?.name || name || 'Unknown File'); let displayType = $derived( - uploadedFile?.type || - (attachment?.type === 'imageFile' + uploadedFile + ? uploadedFile.type + : attachment?.type === AttachmentType.IMAGE ? 'image' - : attachment?.type === 'textFile' + : attachment?.type === AttachmentType.TEXT ? 'text' - : attachment?.type === 'audioFile' - ? attachment.mimeType || 'audio' - : attachment?.type === 'pdfFile' + : attachment?.type === AttachmentType.AUDIO + ? attachment.mimeType || ModelModality.AUDIO + : attachment?.type === AttachmentType.PDF ? 'application/pdf' - : type || 'unknown') + : type || 'unknown' ); let displaySize = $derived(uploadedFile?.size || size); @@ -53,7 +59,7 @@ }); - + {displayName} diff --git a/tools/server/webui/src/lib/components/app/dialogs/DialogModelInformation.svelte b/tools/server/webui/src/lib/components/app/dialogs/DialogModelInformation.svelte new file mode 100644 index 0000000000000..c8ace168bb9dc --- /dev/null +++ b/tools/server/webui/src/lib/components/app/dialogs/DialogModelInformation.svelte @@ -0,0 +1,245 @@ + + + + + + + + Model Information + Current model details and capabilities + + +
+ {#if isLoadingModels} +
+
Loading model information...
+
+ {:else if modelsData && modelsData.data.length > 0} + {@const modelMeta = modelsData.data[0].meta} + + {#if serverProps} + + + + Model + + + + {serverStore.modelName} + + + + + + + + File Path + + + + {serverProps.model_path} + + + copyToClipboard(serverProps.model_path)} + /> + + + + + + Context Size + {formatNumber(serverProps.default_generation_settings.n_ctx)} tokens + + + + {#if modelMeta?.n_ctx_train} + + Training Context + {formatNumber(modelMeta.n_ctx_train)} tokens + + {/if} + + + {#if modelMeta?.size} + + Model Size + {formatSize(modelMeta.size)} + + {/if} + + + {#if modelMeta?.n_params} + + Parameters + {formatParameters(modelMeta.n_params)} + + {/if} + + + {#if modelMeta?.n_embd} + + Embedding Size + {formatNumber(modelMeta.n_embd)} + + {/if} + + + {#if modelMeta?.n_vocab} + + Vocabulary Size + {formatNumber(modelMeta.n_vocab)} tokens + + {/if} + + + {#if modelMeta?.vocab_type} + + Vocabulary Type + {modelMeta.vocab_type} + + {/if} + + + + Parallel Slots + {serverProps.total_slots} + + + + {#if modalities.length > 0} + + Modalities + +
+ +
+
+
+ {/if} + + + + Build Info + {serverProps.build_info} + + + + {#if serverProps.chat_template} + + Chat Template + +
+
{serverProps.chat_template}
+
+
+
+ {/if} +
+
+ {/if} + {:else if !isLoadingModels} +
+
No model information available
+
+ {/if} +
+
+
diff --git a/tools/server/webui/src/lib/components/app/index.ts b/tools/server/webui/src/lib/components/app/index.ts index 54bd8d5aa355f..b8f0fa1db0943 100644 --- a/tools/server/webui/src/lib/components/app/index.ts +++ b/tools/server/webui/src/lib/components/app/index.ts @@ -12,7 +12,6 @@ export { default as ChatFormActionRecord } from './chat/ChatForm/ChatFormActions export { default as ChatFormActions } from './chat/ChatForm/ChatFormActions/ChatFormActions.svelte'; export { default as ChatFormFileInputInvisible } from './chat/ChatForm/ChatFormFileInputInvisible.svelte'; export { default as ChatFormHelperText } from './chat/ChatForm/ChatFormHelperText.svelte'; -export { default as ChatFormModelSelector } from './chat/ChatForm/ChatFormModelSelector.svelte'; export { default as ChatFormTextarea } from './chat/ChatForm/ChatFormTextarea.svelte'; export { default as ChatMessage } from './chat/ChatMessages/ChatMessage.svelte'; @@ -45,19 +44,22 @@ export { default as DialogConfirmation } from './dialogs/DialogConfirmation.svel export { default as DialogConversationSelection } from './dialogs/DialogConversationSelection.svelte'; export { default as DialogConversationTitleUpdate } from './dialogs/DialogConversationTitleUpdate.svelte'; export { default as DialogEmptyFileAlert } from './dialogs/DialogEmptyFileAlert.svelte'; +export { default as DialogModelInformation } from './dialogs/DialogModelInformation.svelte'; // Miscellanous export { default as ActionButton } from './misc/ActionButton.svelte'; export { default as ActionDropdown } from './misc/ActionDropdown.svelte'; +export { default as BadgeModelName } from './misc/BadgeModelName.svelte'; +export { default as BadgeModality } from './misc/BadgeModality.svelte'; export { default as ConversationSelection } from './misc/ConversationSelection.svelte'; export { default as KeyboardShortcutInfo } from './misc/KeyboardShortcutInfo.svelte'; export { default as MarkdownContent } from './misc/MarkdownContent.svelte'; export { default as RemoveButton } from './misc/RemoveButton.svelte'; +export { default as SelectorModel } from './misc/SelectorModel.svelte'; // Server export { default as ServerStatus } from './server/ServerStatus.svelte'; export { default as ServerErrorSplash } from './server/ServerErrorSplash.svelte'; export { default as ServerLoadingSplash } from './server/ServerLoadingSplash.svelte'; -export { default as ServerInfo } from './server/ServerInfo.svelte'; diff --git a/tools/server/webui/src/lib/components/app/misc/BadgeModality.svelte b/tools/server/webui/src/lib/components/app/misc/BadgeModality.svelte new file mode 100644 index 0000000000000..b9413cb8531e4 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/misc/BadgeModality.svelte @@ -0,0 +1,51 @@ + + +{#each modalities as modality, index (index)} + {@const IconComponent = getModalityIcon(modality)} + + + {#if IconComponent} + + {/if} + + {getModalityLabel(modality)} + +{/each} diff --git a/tools/server/webui/src/lib/components/app/misc/BadgeModelName.svelte b/tools/server/webui/src/lib/components/app/misc/BadgeModelName.svelte new file mode 100644 index 0000000000000..babdcd90b0cfd --- /dev/null +++ b/tools/server/webui/src/lib/components/app/misc/BadgeModelName.svelte @@ -0,0 +1,53 @@ + + +{#snippet badge()} + +
+ +
+ + {model} +
+{/snippet} + +{#if model && isModelMode} + {#if showTooltip} + + + {@render badge()} + + + + {onclick ? 'Click for model details' : 'Model name'} + + + {:else} + {@render badge()} + {/if} +{/if} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte b/tools/server/webui/src/lib/components/app/misc/SelectorModel.svelte similarity index 66% rename from tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte rename to tools/server/webui/src/lib/components/app/misc/SelectorModel.svelte index f07eb7a4395a4..22d487c9e5cf1 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormModelSelector.svelte +++ b/tools/server/webui/src/lib/components/app/misc/SelectorModel.svelte @@ -1,36 +1,46 @@ - -
- {#if loading && options.length === 0 && !isMounted} +
+ {#if loading && options.length === 0 && isRouter}
- + Loading models…
- {:else if options.length === 0} + {:else if options.length === 0 && isRouter}

No models available.

{:else} {@const selectedOption = getDisplayOption()} -
+
- {#if isOpen} + {#if isOpen && isRouter}
handleOptionSelect(option.id)} + aria-selected={currentModel === option.model || activeId === option.id} + onclick={() => handleSelect(option.id)} > - - {option.name} - - - {#if option.description} - {option.description} - {/if} + {option.name} {/each}
@@ -345,8 +326,8 @@ {/if}
{/if} - - {#if error} -

{error}

- {/if}
+ +{#if showModelDialog && !isRouter} + +{/if} diff --git a/tools/server/webui/src/lib/components/app/server/ServerInfo.svelte b/tools/server/webui/src/lib/components/app/server/ServerInfo.svelte deleted file mode 100644 index 9a43e333c4902..0000000000000 --- a/tools/server/webui/src/lib/components/app/server/ServerInfo.svelte +++ /dev/null @@ -1,43 +0,0 @@ - - -{#if props} -
- {#if model} - - - - {model} - - {/if} - -
- {#if props.default_generation_settings.n_ctx} - - ctx: {props.default_generation_settings.n_ctx.toLocaleString()} - - {/if} - - {#if modalities.length > 0} - {#each modalities as modality (modality)} - - {#if modality === 'vision'} - - {:else if modality === 'audio'} - - {/if} - - {modality} - - {/each} - {/if} -
-
-{/if} diff --git a/tools/server/webui/src/lib/components/ui/table/index.ts b/tools/server/webui/src/lib/components/ui/table/index.ts new file mode 100644 index 0000000000000..99239aeead53e --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/index.ts @@ -0,0 +1,28 @@ +import Root from './table.svelte'; +import Body from './table-body.svelte'; +import Caption from './table-caption.svelte'; +import Cell from './table-cell.svelte'; +import Footer from './table-footer.svelte'; +import Head from './table-head.svelte'; +import Header from './table-header.svelte'; +import Row from './table-row.svelte'; + +export { + Root, + Body, + Caption, + Cell, + Footer, + Head, + Header, + Row, + // + Root as Table, + Body as TableBody, + Caption as TableCaption, + Cell as TableCell, + Footer as TableFooter, + Head as TableHead, + Header as TableHeader, + Row as TableRow +}; diff --git a/tools/server/webui/src/lib/components/ui/table/table-body.svelte b/tools/server/webui/src/lib/components/ui/table/table-body.svelte new file mode 100644 index 0000000000000..f8df65cf689b3 --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-body.svelte @@ -0,0 +1,20 @@ + + + + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-caption.svelte b/tools/server/webui/src/lib/components/ui/table/table-caption.svelte new file mode 100644 index 0000000000000..0fdcc6439c1b3 --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-caption.svelte @@ -0,0 +1,20 @@ + + + + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-cell.svelte b/tools/server/webui/src/lib/components/ui/table/table-cell.svelte new file mode 100644 index 0000000000000..4506fdfc5bc3c --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-cell.svelte @@ -0,0 +1,23 @@ + + + + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-footer.svelte b/tools/server/webui/src/lib/components/ui/table/table-footer.svelte new file mode 100644 index 0000000000000..77e4a64c08b23 --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-footer.svelte @@ -0,0 +1,20 @@ + + +tr]:last:border-b-0', className)} + {...restProps} +> + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-head.svelte b/tools/server/webui/src/lib/components/ui/table/table-head.svelte new file mode 100644 index 0000000000000..c1c57ad443495 --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-head.svelte @@ -0,0 +1,23 @@ + + + + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-header.svelte b/tools/server/webui/src/lib/components/ui/table/table-header.svelte new file mode 100644 index 0000000000000..eb366739b39e3 --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-header.svelte @@ -0,0 +1,20 @@ + + + + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-row.svelte b/tools/server/webui/src/lib/components/ui/table/table-row.svelte new file mode 100644 index 0000000000000..4131d3660a4c7 --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-row.svelte @@ -0,0 +1,23 @@ + + +svelte-css-wrapper]:[&>th,td]:bg-muted/50', + className + )} + {...restProps} +> + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table.svelte b/tools/server/webui/src/lib/components/ui/table/table.svelte new file mode 100644 index 0000000000000..c11a6a6c4ba62 --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table.svelte @@ -0,0 +1,22 @@ + + +
+ + {@render children?.()} +
+
diff --git a/tools/server/webui/src/lib/enums/attachment.ts b/tools/server/webui/src/lib/enums/attachment.ts new file mode 100644 index 0000000000000..7c7d0da994699 --- /dev/null +++ b/tools/server/webui/src/lib/enums/attachment.ts @@ -0,0 +1,10 @@ +/** + * Attachment type enum for database message extras + */ +export enum AttachmentType { + AUDIO = 'AUDIO', + IMAGE = 'IMAGE', + PDF = 'PDF', + TEXT = 'TEXT', + LEGACY_CONTEXT = 'context' // Legacy attachment type for backward compatibility +} diff --git a/tools/server/webui/src/lib/enums/model.ts b/tools/server/webui/src/lib/enums/model.ts new file mode 100644 index 0000000000000..7729ecfeabb03 --- /dev/null +++ b/tools/server/webui/src/lib/enums/model.ts @@ -0,0 +1,5 @@ +export enum ModelModality { + TEXT = 'TEXT', + AUDIO = 'AUDIO', + VISION = 'VISION' +} diff --git a/tools/server/webui/src/lib/enums/server.ts b/tools/server/webui/src/lib/enums/server.ts new file mode 100644 index 0000000000000..f2d893537d34a --- /dev/null +++ b/tools/server/webui/src/lib/enums/server.ts @@ -0,0 +1,19 @@ +/** + * Server mode enum - used for single/multi-model mode + */ +export enum ServerMode { + /** Single model mode - server running with a specific model loaded */ + MODEL = 'MODEL', + /** Router mode - server managing multiple model instances */ + ROUTER = 'ROUTER' +} + +/** + * Model status enum - matches tools/server/server-models.h from C++ server + */ +export enum ServerModelStatus { + UNLOADED = 'UNLOADED', + LOADING = 'LOADING', + LOADED = 'LOADED', + FAILED = 'FAILED' +} diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index aa83910b27f53..a0fc4a0f5d1bc 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -7,8 +7,10 @@ import type { ApiChatCompletionStreamChunk, ApiChatCompletionToolCall, ApiChatCompletionToolCallDelta, - ApiChatMessageData + ApiChatMessageData, + ApiModelListResponse } from '$lib/types/api'; +import { AttachmentType } from '$lib/enums/attachment'; import type { DatabaseMessage, DatabaseMessageExtra, @@ -74,7 +76,6 @@ export class ChatService { onReasoningChunk, onToolCallChunk, onModel, - onFirstValidChunk, // Generation parameters temperature, max_tokens, @@ -223,7 +224,6 @@ export class ChatService { onReasoningChunk, onToolCallChunk, onModel, - onFirstValidChunk, conversationId, abortController.signal ); @@ -298,7 +298,6 @@ export class ChatService { onReasoningChunk?: (chunk: string) => void, onToolCallChunk?: (chunk: string) => void, onModel?: (model: string) => void, - onFirstValidChunk?: () => void, conversationId?: string, abortSignal?: AbortSignal ): Promise { @@ -315,7 +314,6 @@ export class ChatService { let lastTimings: ChatMessageTimings | undefined; let streamFinished = false; let modelEmitted = false; - let firstValidChunkEmitted = false; let toolCallIndexOffset = 0; let hasOpenToolCallBatch = false; @@ -382,15 +380,6 @@ export class ChatService { try { const parsed: ApiChatCompletionStreamChunk = JSON.parse(data); - - if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') { - firstValidChunkEmitted = true; - - if (!abortSignal?.aborted) { - onFirstValidChunk?.(); - } - } - const content = parsed.choices[0]?.delta?.content; const reasoningContent = parsed.choices[0]?.delta?.reasoning_content; const toolCalls = parsed.choices[0]?.delta?.tool_calls; @@ -618,7 +607,7 @@ export class ChatService { const imageFiles = message.extra.filter( (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile => - extra.type === 'imageFile' + extra.type === AttachmentType.IMAGE ); for (const image of imageFiles) { @@ -630,7 +619,7 @@ export class ChatService { const textFiles = message.extra.filter( (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile => - extra.type === 'textFile' + extra.type === AttachmentType.TEXT ); for (const textFile of textFiles) { @@ -643,7 +632,7 @@ export class ChatService { // Handle legacy 'context' type from old webui (pasted content) const legacyContextFiles = message.extra.filter( (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraLegacyContext => - extra.type === 'context' + extra.type === AttachmentType.LEGACY_CONTEXT ); for (const legacyContextFile of legacyContextFiles) { @@ -655,7 +644,7 @@ export class ChatService { const audioFiles = message.extra.filter( (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile => - extra.type === 'audioFile' + extra.type === AttachmentType.AUDIO ); for (const audio of audioFiles) { @@ -670,7 +659,7 @@ export class ChatService { const pdfFiles = message.extra.filter( (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile => - extra.type === 'pdfFile' + extra.type === AttachmentType.PDF ); for (const pdfFile of pdfFiles) { @@ -722,6 +711,33 @@ export class ChatService { } } + /** + * Get model information from /models endpoint + */ + static async getModels(): Promise { + try { + const currentConfig = config(); + const apiKey = currentConfig.apiKey?.toString().trim(); + + const response = await fetch(`./models`, { + headers: { + 'Content-Type': 'application/json', + ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) + } + }); + + if (!response.ok) { + throw new Error(`Failed to fetch models: ${response.status} ${response.statusText}`); + } + + const data = await response.json(); + return data; + } catch (error) { + console.error('Error fetching models:', error); + throw error; + } + } + /** * Aborts any ongoing chat completion request. * Cancels the current request and cleans up the abort controller. diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index c70b9580cb75b..028ada3287194 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -1,7 +1,6 @@ import { DatabaseStore } from '$lib/stores/database'; import { chatService, slotsService } from '$lib/services'; import { config } from '$lib/stores/settings.svelte'; -import { serverStore } from '$lib/stores/server.svelte'; import { normalizeModelName } from '$lib/utils/model-names'; import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching'; import { browser } from '$app/environment'; @@ -365,41 +364,15 @@ class ChatStore { let resolvedModel: string | null = null; let modelPersisted = false; - const currentConfig = config(); - const preferServerPropsModel = !currentConfig.modelSelectorEnabled; - let serverPropsRefreshed = false; - let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null; - - const refreshServerPropsOnce = () => { - if (serverPropsRefreshed) { - return; - } - - serverPropsRefreshed = true; - - const hasExistingProps = serverStore.serverProps !== null; - - serverStore - .fetchServerProps({ silent: hasExistingProps }) - .then(() => { - updateModelFromServerProps?.(true); - }) - .catch((error) => { - console.warn('Failed to refresh server props after streaming started:', error); - }); - }; const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => { - const serverModelName = serverStore.modelName; - const preferredModelSource = preferServerPropsModel - ? (serverModelName ?? modelName ?? null) - : (modelName ?? serverModelName ?? null); - - if (!preferredModelSource) { + if (!modelName) { return; } - const normalizedModel = normalizeModelName(preferredModelSource); + const normalizedModel = normalizeModelName(modelName); + + console.log('Resolved model:', normalizedModel); if (!normalizedModel || normalizedModel === resolvedModel) { return; @@ -423,20 +396,6 @@ class ChatStore { } }; - if (preferServerPropsModel) { - updateModelFromServerProps = (persistImmediately = true) => { - const currentServerModel = serverStore.modelName; - - if (!currentServerModel) { - return; - } - - recordModel(currentServerModel, persistImmediately); - }; - - updateModelFromServerProps(false); - } - slotsService.startStreaming(); slotsService.setActiveConversation(assistantMessage.convId); @@ -445,9 +404,6 @@ class ChatStore { { ...this.getApiOptions(), - onFirstValidChunk: () => { - refreshServerPropsOnce(); - }, onChunk: (chunk: string) => { streamedContent += chunk; this.setConversationStreaming( diff --git a/tools/server/webui/src/lib/stores/server.svelte.ts b/tools/server/webui/src/lib/stores/server.svelte.ts index e95c0bcea2f9e..b8d47f295f1cb 100644 --- a/tools/server/webui/src/lib/stores/server.svelte.ts +++ b/tools/server/webui/src/lib/stores/server.svelte.ts @@ -2,6 +2,9 @@ import { browser } from '$app/environment'; import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys'; import { ChatService } from '$lib/services/chat'; import { config } from '$lib/stores/settings.svelte'; +import { ServerMode } from '$lib/enums/server'; +import { ModelModality } from '$lib/enums/model'; +import { updateConfig } from '$lib/stores/settings.svelte'; /** * ServerStore - Server state management and capability detection @@ -52,6 +55,10 @@ class ServerStore { private _error = $state(null); private _serverWarning = $state(null); private _slotsEndpointAvailable = $state(null); + private _serverMode = $state(null); + private _selectedModel = $state(null); + private _availableModels = $state([]); + private _modelLoadingStates = $state>(new Map()); private fetchServerPropsPromise: Promise | null = null; private readCachedServerProps(): ApiLlamaCppServerProps | null { @@ -106,13 +113,13 @@ class ServerStore { return this._serverProps.model_path.split(/(\\|\/)/).pop() || null; } - get supportedModalities(): string[] { - const modalities: string[] = []; + get supportedModalities(): ModelModality[] { + const modalities: ModelModality[] = []; if (this._serverProps?.modalities?.audio) { - modalities.push('audio'); + modalities.push(ModelModality.AUDIO); } if (this._serverProps?.modalities?.vision) { - modalities.push('vision'); + modalities.push(ModelModality.VISION); } return modalities; } @@ -135,6 +142,48 @@ class ServerStore { return this._serverProps?.default_generation_settings?.params || null; } + /** + * Get current server mode + */ + get serverMode(): ServerMode | null { + return this._serverMode; + } + + /** + * Detect if server is running in router mode (multi-model management) + */ + get isRouterMode(): boolean { + return this._serverMode === ServerMode.ROUTER; + } + + /** + * Detect if server is running in model mode (single model loaded) + */ + get isModelMode(): boolean { + return this._serverMode === ServerMode.MODEL; + } + + /** + * Get currently selected model in router mode + */ + get selectedModel(): string | null { + return this._selectedModel; + } + + /** + * Get list of available models + */ + get availableModels(): ApiRouterModelMeta[] { + return this._availableModels; + } + + /** + * Check if a specific model is currently loading + */ + isModelLoading(modelName: string): boolean { + return this._modelLoadingStates.get(modelName) ?? false; + } + /** * Check if slots endpoint is available based on server properties and endpoint support */ @@ -198,6 +247,21 @@ class ServerStore { this.persistServerProps(props); this._error = null; this._serverWarning = null; + + // Detect server mode based on model_path + if (props.model_path === 'none') { + this._serverMode = ServerMode.ROUTER; + console.info('Server running in ROUTER mode (multi-model management)'); + + // Auto-enable model selector in router mode + if (browser) { + updateConfig('modelSelectorEnabled', true); + } + } else { + this._serverMode = ServerMode.MODEL; + console.info('Server running in MODEL mode (single model)'); + } + await this.checkSlotsEndpointAvailability(); } catch (error) { if (isSilent && hadProps) { @@ -312,6 +376,10 @@ class ServerStore { this._serverWarning = null; this._loading = false; this._slotsEndpointAvailable = null; + this._serverMode = null; + this._selectedModel = null; + this._availableModels = []; + this._modelLoadingStates.clear(); this.fetchServerPropsPromise = null; this.persistServerProps(null); } @@ -329,3 +397,10 @@ export const supportsVision = () => serverStore.supportsVision; export const supportsAudio = () => serverStore.supportsAudio; export const slotsEndpointAvailable = () => serverStore.slotsEndpointAvailable; export const serverDefaultParams = () => serverStore.serverDefaultParams; + +// Server mode exports +export const serverMode = () => serverStore.serverMode; +export const isRouterMode = () => serverStore.isRouterMode; +export const isModelMode = () => serverStore.isModelMode; +export const selectedModel = () => serverStore.selectedModel; +export const availableModels = () => serverStore.availableModels; diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index 1a8bc64989957..5956780405576 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -1,3 +1,4 @@ +import type { ServerModelStatus } from '$lib/enums/model'; import type { ChatMessagePromptProgress } from './chat'; export interface ApiChatMessageContentPart { @@ -314,3 +315,74 @@ export interface ApiProcessingState { promptTokens?: number; cacheTokens?: number; } + +export interface ApiRouterModelMeta { + /** Model identifier (e.g., "unsloth/phi-4-GGUF:q4_k_m") */ + name: string; + /** Path to model file or manifest */ + path: string; + /** Optional path to multimodal projector */ + path_mmproj?: string; + /** Whether model is in HuggingFace cache */ + in_cache: boolean; + /** Port where model instance is running (0 if not loaded) */ + port: number; + /** Current status of the model */ + status: ServerModelStatus; + /** Error message if status is FAILED */ + error?: string; +} + +/** + * Request to load a model + */ +export interface ApiRouterModelsLoadRequest { + model: string; +} + +/** + * Response from loading a model + */ +export interface ApiRouterModelsLoadResponse { + success: boolean; + error?: string; +} + +/** + * Request to check model status + */ +export interface ApiRouterModelsStatusRequest { + model: string; +} + +/** + * Response with model status + */ +export interface ApiRouterModelsStatusResponse { + model: string; + status: ModelStatus; + port?: number; + error?: string; +} + +/** + * Response with list of all models + */ +export interface ApiRouterModelsListResponse { + models: ApiRouterModelMeta[]; +} + +/** + * Request to unload a model + */ +export interface ApiRouterModelsUnloadRequest { + model: string; +} + +/** + * Response from unloading a model + */ +export interface ApiRouterModelsUnloadResponse { + success: boolean; + error?: string; +} diff --git a/tools/server/webui/src/lib/types/database.d.ts b/tools/server/webui/src/lib/types/database.d.ts index 16debc6d67d58..9a5d9204de1b3 100644 --- a/tools/server/webui/src/lib/types/database.d.ts +++ b/tools/server/webui/src/lib/types/database.d.ts @@ -1,4 +1,5 @@ -import type { ChatMessageTimings } from './chat'; +import type { ChatMessageTimings, ChatRole, ChatMessageType } from '$lib/types/chat'; +import { AttachmentType } from '$lib/enums/attachment'; export interface DatabaseConversation { currNode: string | null; @@ -8,38 +9,39 @@ export interface DatabaseConversation { } export interface DatabaseMessageExtraAudioFile { - type: 'audioFile'; + type: AttachmentType.AUDIO; name: string; base64Data: string; mimeType: string; } export interface DatabaseMessageExtraImageFile { - type: 'imageFile'; + type: AttachmentType.IMAGE; name: string; base64Url: string; } -export interface DatabaseMessageExtraTextFile { - type: 'textFile'; +/** + * Legacy format from old webui - pasted content was stored as "context" type + * @deprecated Use DatabaseMessageExtraTextFile instead + */ +export interface DatabaseMessageExtraLegacyContext { + type: AttachmentType.LEGACY_CONTEXT; name: string; content: string; } export interface DatabaseMessageExtraPdfFile { - type: 'pdfFile'; + type: AttachmentType.PDF; + base64Data: string; name: string; content: string; // Text content extracted from PDF images?: string[]; // Optional: PDF pages as base64 images processedAsImages: boolean; // Whether PDF was processed as images } -/** - * Legacy format from old webui - pasted content was stored as "context" type - * @deprecated Use DatabaseMessageExtraTextFile instead - */ -export interface DatabaseMessageExtraLegacyContext { - type: 'context'; +export interface DatabaseMessageExtraTextFile { + type: AttachmentType.TEXT; name: string; content: string; } diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index b47842b66e619..24be6053e27e1 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -45,7 +45,6 @@ export interface SettingsChatServiceOptions { onReasoningChunk?: (chunk: string) => void; onToolCallChunk?: (chunk: string) => void; onModel?: (model: string) => void; - onFirstValidChunk?: () => void; onComplete?: ( response: string, reasoningContent?: string, diff --git a/tools/server/webui/src/lib/utils/convert-files-to-extra.ts b/tools/server/webui/src/lib/utils/convert-files-to-extra.ts index 70c6f772d997a..8229d2b0bd11e 100644 --- a/tools/server/webui/src/lib/utils/convert-files-to-extra.ts +++ b/tools/server/webui/src/lib/utils/convert-files-to-extra.ts @@ -2,6 +2,7 @@ import { convertPDFToImage, convertPDFToText } from './pdf-processing'; import { isSvgMimeType, svgBase64UrlToPngDataURL } from './svg-to-png'; import { isWebpMimeType, webpBase64UrlToPngDataURL } from './webp-to-png'; import { FileTypeCategory } from '$lib/enums/files'; +import { AttachmentType } from '$lib/enums/attachment'; import { config, settingsStore } from '$lib/stores/settings.svelte'; import { supportsVision } from '$lib/stores/server.svelte'; import { getFileTypeCategory } from '$lib/utils/file-type'; @@ -56,7 +57,7 @@ export async function parseFilesToMessageExtras( } extras.push({ - type: 'imageFile', + type: AttachmentType.IMAGE, name: file.name, base64Url }); @@ -67,7 +68,7 @@ export async function parseFilesToMessageExtras( const base64Data = await readFileAsBase64(file.file); extras.push({ - type: 'audioFile', + type: AttachmentType.AUDIO, name: file.name, base64Data: base64Data, mimeType: file.type @@ -117,7 +118,7 @@ export async function parseFilesToMessageExtras( ); extras.push({ - type: 'pdfFile', + type: AttachmentType.PDF, name: file.name, content: `PDF file with ${images.length} pages`, images: images, @@ -134,7 +135,7 @@ export async function parseFilesToMessageExtras( const content = await convertPDFToText(file.file); extras.push({ - type: 'pdfFile', + type: AttachmentType.PDF, name: file.name, content: content, processedAsImages: false, @@ -151,7 +152,7 @@ export async function parseFilesToMessageExtras( }); extras.push({ - type: 'pdfFile', + type: AttachmentType.PDF, name: file.name, content: content, processedAsImages: false, @@ -171,7 +172,7 @@ export async function parseFilesToMessageExtras( emptyFiles.push(file.name); } else if (isLikelyTextFile(content)) { extras.push({ - type: 'textFile', + type: AttachmentType.TEXT, name: file.name, content: content }); diff --git a/tools/server/webui/src/lib/utils/model-names.ts b/tools/server/webui/src/lib/utils/model-names.ts index b1ea9d95361e6..c0a1e1c578fb6 100644 --- a/tools/server/webui/src/lib/utils/model-names.ts +++ b/tools/server/webui/src/lib/utils/model-names.ts @@ -1,16 +1,19 @@ /** - * Normalizes a model name by extracting the filename from a path. + * Normalizes a model name by extracting the filename from a path, but preserves Hugging Face repository format. * * Handles both forward slashes (/) and backslashes (\) as path separators. - * If the model name is just a filename (no path), returns it as-is. + * - If the model name has exactly one slash (org/model format), preserves the full "org/model" name + * - If the model name has no slash or multiple slashes, extracts just the filename + * - If the model name is just a filename (no path), returns it as-is. * * @param modelName - The model name or path to normalize - * @returns The normalized model name (filename only) + * @returns The normalized model name * * @example - * normalizeModelName('models/llama-3.1-8b') // Returns: 'llama-3.1-8b' - * normalizeModelName('C:\\Models\\gpt-4') // Returns: 'gpt-4' - * normalizeModelName('simple-model') // Returns: 'simple-model' + * normalizeModelName('models/llama-3.1-8b') // Returns: 'llama-3.1-8b' (multiple slashes -> filename) + * normalizeModelName('C:\\Models\\gpt-4') // Returns: 'gpt-4' (multiple slashes -> filename) + * normalizeModelName('meta-llama/Llama-3.1-8B') // Returns: 'meta-llama/Llama-3.1-8B' (Hugging Face format) + * normalizeModelName('simple-model') // Returns: 'simple-model' (no slash) * normalizeModelName(' spaced ') // Returns: 'spaced' * normalizeModelName('') // Returns: '' */ @@ -22,6 +25,20 @@ export function normalizeModelName(modelName: string): string { } const segments = trimmed.split(/[\\/]/); + + // If we have exactly 2 segments (one slash), treat it as Hugging Face repo format + // and preserve the full "org/model" format + if (segments.length === 2) { + const [org, model] = segments; + const trimmedOrg = org?.trim(); + const trimmedModel = model?.trim(); + + if (trimmedOrg && trimmedModel) { + return `${trimmedOrg}/${trimmedModel}`; + } + } + + // For other cases (no slash, or multiple slashes), extract just the filename const candidate = segments.pop(); const normalized = candidate?.trim(); diff --git a/tools/server/webui/vite.config.ts b/tools/server/webui/vite.config.ts index 11ff665d8b2f8..f2df5dc287ca3 100644 --- a/tools/server/webui/vite.config.ts +++ b/tools/server/webui/vite.config.ts @@ -158,7 +158,8 @@ export default defineConfig({ proxy: { '/v1': 'http://localhost:8080', '/props': 'http://localhost:8080', - '/slots': 'http://localhost:8080' + '/slots': 'http://localhost:8080', + '/models': 'http://localhost:8080' }, headers: { 'Cross-Origin-Embedder-Policy': 'require-corp',