From c51124c710e1aa134d0ff10afad6511559d73022 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 11 Nov 2025 18:48:33 +0100 Subject: [PATCH 01/16] feat: initial hook to install elements directly Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/localai/backend.go | 4 +- core/http/endpoints/localai/gallery.go | 4 +- core/http/endpoints/localai/import_model.go | 43 +++++++++++++++++++++ core/http/routes/localai.go | 3 ++ core/http/routes/ui_api.go | 8 ++-- core/p2p/sync.go | 2 +- core/schema/localai.go | 6 +++ core/services/backends.go | 2 +- core/services/gallery.go | 8 ++-- core/services/models.go | 35 ++++++++++------- core/services/operation.go | 8 +++- 11 files changed, 92 insertions(+), 31 deletions(-) diff --git a/core/http/endpoints/localai/backend.go b/core/http/endpoints/localai/backend.go index f4d88ffec154..e2f5f5635a21 100644 --- a/core/http/endpoints/localai/backend.go +++ b/core/http/endpoints/localai/backend.go @@ -76,7 +76,7 @@ func (mgs *BackendEndpointService) ApplyBackendEndpoint() func(c *fiber.Ctx) err if err != nil { return err } - mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{ + mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{ ID: uuid.String(), GalleryElementName: input.ID, Galleries: mgs.galleries, @@ -95,7 +95,7 @@ func (mgs *BackendEndpointService) DeleteBackendEndpoint() func(c *fiber.Ctx) er return func(c *fiber.Ctx) error { backendName := c.Params("name") - mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{ + mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{ Delete: true, GalleryElementName: backendName, Galleries: mgs.galleries, diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go index 8948fde34250..d8079bcc5128 100644 --- a/core/http/endpoints/localai/gallery.go +++ b/core/http/endpoints/localai/gallery.go @@ -77,7 +77,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe if err != nil { return err } - mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{ + mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{ Req: input.GalleryModel, ID: uuid.String(), GalleryElementName: input.ID, @@ -98,7 +98,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib return func(c *fiber.Ctx) error { modelName := c.Params("name") - mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{ + mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{ Delete: true, GalleryElementName: modelName, } diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go index c6ff6bb962ea..e579642ed10e 100644 --- a/core/http/endpoints/localai/import_model.go +++ b/core/http/endpoints/localai/import_model.go @@ -2,16 +2,59 @@ package localai import ( "encoding/json" + "fmt" "os" "path/filepath" "strings" "github.com/gofiber/fiber/v2" + "github.com/google/uuid" "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + httpUtils "github.com/mudler/LocalAI/core/http/utils" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/pkg/utils" + "gopkg.in/yaml.v3" ) +// ImportModelURIEndpoint handles creating new model configurations from a URI +func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) fiber.Handler { + return func(c *fiber.Ctx) error { + + input := new(schema.ImportModelRequest) + + if err := c.BodyParser(input); err != nil { + return err + } + + // From the input, we are going to extract a default settings for the model, and use the gallery service + // TODO: This is now all fake data + modelConfig := gallery.ModelConfig{ + Name: "imported-model-name", + } + + uuid, err := uuid.NewUUID() + if err != nil { + return err + } + galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{ + Req: gallery.GalleryModel{ + Overrides: map[string]interface{}{}, + }, + ID: uuid.String(), + GalleryElement: &modelConfig, + BackendGalleries: appConfig.BackendGalleries, + } + + return c.JSON(schema.GalleryResponse{ + ID: uuid.String(), + StatusURL: fmt.Sprintf("%smodels/jobs/%s", httpUtils.BaseURL(c), uuid.String()), + }) + } +} + // ImportModelEndpoint handles creating new model configurations func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) fiber.Handler { return func(c *fiber.Ctx) error { diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 298f5fd787b3..119a02b55311 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -57,6 +57,9 @@ func RegisterLocalAIRoutes(router *fiber.App, // Custom model import endpoint router.Post("/models/import", localai.ImportModelEndpoint(cl, appConfig)) + // URI model import endpoint + router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService)) + // Custom model edit endpoint router.Post("/models/edit/:name", localai.EditModelEndpoint(cl, appConfig)) diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index 1fb016825c2b..d60809512d38 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -247,7 +247,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig uid := id.String() opcache.Set(galleryID, uid) - op := services.GalleryOp[gallery.GalleryModel]{ + op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{ ID: uid, GalleryElementName: galleryID, Galleries: appConfig.Galleries, @@ -290,7 +290,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig opcache.Set(galleryID, uid) - op := services.GalleryOp[gallery.GalleryModel]{ + op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{ ID: uid, Delete: true, GalleryElementName: galleryName, @@ -525,7 +525,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig uid := id.String() opcache.Set(backendID, uid) - op := services.GalleryOp[gallery.GalleryBackend]{ + op := services.GalleryOp[gallery.GalleryBackend, any]{ ID: uid, GalleryElementName: backendID, Galleries: appConfig.BackendGalleries, @@ -567,7 +567,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig opcache.Set(backendID, uid) - op := services.GalleryOp[gallery.GalleryBackend]{ + op := services.GalleryOp[gallery.GalleryBackend, any]{ ID: uid, Delete: true, GalleryElementName: backendName, diff --git a/core/p2p/sync.go b/core/p2p/sync.go index f9be422a6bf5..44efe93d8224 100644 --- a/core/p2p/sync.go +++ b/core/p2p/sync.go @@ -73,7 +73,7 @@ func syncState(ctx context.Context, n *node.Node, app *application.Application) continue } - app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{ + app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{ ID: uuid.String(), GalleryElementName: model, Galleries: app.ApplicationConfig().Galleries, diff --git a/core/schema/localai.go b/core/schema/localai.go index 0f9460cf4ae1..5eb56d91bf5d 100644 --- a/core/schema/localai.go +++ b/core/schema/localai.go @@ -1,6 +1,7 @@ package schema import ( + "encoding/json" "time" gopsutil "github.com/shirou/gopsutil/v3/process" @@ -157,3 +158,8 @@ type Detection struct { Height float32 `json:"height"` ClassName string `json:"class_name"` } + +type ImportModelRequest struct { + URI string `json:"uri"` + Preferences json.RawMessage `json:"preferences,omitempty"` +} diff --git a/core/services/backends.go b/core/services/backends.go index 7295734dfcbc..7ffca2ac1030 100644 --- a/core/services/backends.go +++ b/core/services/backends.go @@ -8,7 +8,7 @@ import ( "github.com/rs/zerolog/log" ) -func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend], systemState *system.SystemState) error { +func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend, any], systemState *system.SystemState) error { utils.ResetDownloadTimers() g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0}) diff --git a/core/services/gallery.go b/core/services/gallery.go index 4833aa334ba2..2290c450d9ed 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -14,8 +14,8 @@ import ( type GalleryService struct { appConfig *config.ApplicationConfig sync.Mutex - ModelGalleryChannel chan GalleryOp[gallery.GalleryModel] - BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend] + ModelGalleryChannel chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig] + BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend, any] modelLoader *model.ModelLoader statuses map[string]*GalleryOpStatus @@ -24,8 +24,8 @@ type GalleryService struct { func NewGalleryService(appConfig *config.ApplicationConfig, ml *model.ModelLoader) *GalleryService { return &GalleryService{ appConfig: appConfig, - ModelGalleryChannel: make(chan GalleryOp[gallery.GalleryModel]), - BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend]), + ModelGalleryChannel: make(chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig]), + BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend, any]), modelLoader: ml, statuses: make(map[string]*GalleryOpStatus), } diff --git a/core/services/models.go b/core/services/models.go index 43696f455a09..b22999f6b977 100644 --- a/core/services/models.go +++ b/core/services/models.go @@ -9,10 +9,11 @@ import ( "github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/system" "github.com/mudler/LocalAI/pkg/utils" + "github.com/rs/zerolog/log" "gopkg.in/yaml.v2" ) -func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel], cl *config.ModelConfigLoader, systemState *system.SystemState) error { +func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig], cl *config.ModelConfigLoader, systemState *system.SystemState) error { utils.ResetDownloadTimers() g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0}) @@ -118,28 +119,32 @@ func ApplyGalleryFromString(systemState *system.SystemState, modelLoader *model. // processModelOperation handles the installation or deletion of a model func processModelOperation( - op *GalleryOp[gallery.GalleryModel], + op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig], systemState *system.SystemState, modelLoader *model.ModelLoader, enforcePredownloadScans bool, automaticallyInstallBackend bool, progressCallback func(string, string, string, float64), ) error { - // delete a model - if op.Delete { + switch { + case op.Delete: return gallery.DeleteModelFromSystem(systemState, op.GalleryElementName) - } - - // if the request contains a gallery name, we apply the gallery from the gallery list - if op.GalleryElementName != "" { + case op.GalleryElement != nil: + installedModel, err := gallery.InstallModel( + systemState, op.GalleryElement.Name, + op.GalleryElement, + op.Req.Overrides, + progressCallback, enforcePredownloadScans) + if automaticallyInstallBackend && installedModel.Backend != "" { + log.Debug().Msgf("Installing backend %q", installedModel.Backend) + if err := gallery.InstallBackendFromGallery(op.BackendGalleries, systemState, modelLoader, installedModel.Backend, progressCallback, false); err != nil { + return err + } + } + return err + case op.GalleryElementName != "": return gallery.InstallModelFromGallery(op.Galleries, op.BackendGalleries, systemState, modelLoader, op.GalleryElementName, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend) - // } else if op.ConfigURL != "" { - // err := startup.InstallModels(op.Galleries, modelPath, enforcePredownloadScans, progressCallback, op.ConfigURL) - // if err != nil { - // return err - // } - // return cl.Preload(modelPath) - } else { + default: return installModelFromRemoteConfig(systemState, modelLoader, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend, op.BackendGalleries) } } diff --git a/core/services/operation.go b/core/services/operation.go index 962921807f9d..d0ba76ceb1a9 100644 --- a/core/services/operation.go +++ b/core/services/operation.go @@ -5,12 +5,16 @@ import ( "github.com/mudler/LocalAI/pkg/xsync" ) -type GalleryOp[T any] struct { +type GalleryOp[T any, E any] struct { ID string GalleryElementName string Delete bool - Req T + Req T + + // If specified, we install directly the gallery element + GalleryElement *E + Galleries []config.Gallery BackendGalleries []config.Gallery } From 1c5d78d3dadcee40e291179dfd53516c3cbf3772 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 11 Nov 2025 19:04:28 +0100 Subject: [PATCH 02/16] WIP: ui changes Signed-off-by: Ettore Di Giacinto --- core/http/views/model-editor.html | 669 +++++++++++++++-------- core/http/views/partials/inprogress.html | 9 + 2 files changed, 465 insertions(+), 213 deletions(-) diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html index d5cbc550b7c4..f8db7cf66fae 100644 --- a/core/http/views/model-editor.html +++ b/core/http/views/model-editor.html @@ -3,9 +3,10 @@ {{template "views/partials/head" .}} -
+
{{template "views/partials/navbar" .}} + {{template "views/partials/inprogress" .}}
@@ -24,19 +25,44 @@

{{if .ModelName}}Edit Model: {{.ModelName}}{{else}}Import New Model{{end}}

-

Configure your model settings using YAML

+

- - + + + + + +
@@ -45,8 +71,88 @@

- -
+ +
+
+ +
+

+
+ +
+ Import from URI +

+ + +
+ + +

+ Enter the URI or path to the model file you want to import +

+
+ + +
+
+ + +
+ +
+ +
+

+ Add key-value pairs to customize the import process +

+
+
+
+ + +
@@ -144,22 +250,22 @@

} /* Enhanced YAML Syntax Highlighting */ -.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for YAML keys */ -.cm-string { color: #10b981 !important; } /* Emerald for strings */ -.cm-number { color: #f59e0b !important; } /* Amber for numbers */ -.cm-comment { color: #6b7280 !important; font-style: italic !important; } /* Gray for comments */ -.cm-property { color: #ec4899 !important; } /* Pink for properties */ -.cm-operator { color: #ef4444 !important; } /* Red for operators */ -.cm-variable { color: #06b6d4 !important; } /* Cyan for variables */ -.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for tags */ -.cm-attribute { color: #f59e0b !important; } /* Amber for attributes */ -.cm-def { color: #ec4899 !important; font-weight: 600 !important; } /* Pink for definitions */ -.cm-bracket { color: #d1d5db !important; } /* Light gray for brackets */ -.cm-punctuation { color: #d1d5db !important; } /* Light gray for punctuation */ -.cm-quote { color: #10b981 !important; } /* Emerald for quotes */ -.cm-meta { color: #6b7280 !important; } /* Gray for meta */ -.cm-builtin { color: #f472b6 !important; } /* Pink for builtins */ -.cm-atom { color: #f59e0b !important; } /* Amber for atoms like true/false/null */ +.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; } +.cm-string { color: #10b981 !important; } +.cm-number { color: #f59e0b !important; } +.cm-comment { color: #6b7280 !important; font-style: italic !important; } +.cm-property { color: #ec4899 !important; } +.cm-operator { color: #ef4444 !important; } +.cm-variable { color: #06b6d4 !important; } +.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; } +.cm-attribute { color: #f59e0b !important; } +.cm-def { color: #ec4899 !important; font-weight: 600 !important; } +.cm-bracket { color: #d1d5db !important; } +.cm-punctuation { color: #d1d5db !important; } +.cm-quote { color: #10b981 !important; } +.cm-meta { color: #6b7280 !important; } +.cm-builtin { color: #f472b6 !important; } +.cm-atom { color: #f59e0b !important; } /* Enhanced scrollbar styling */ .CodeMirror-scrollbar-filler, .CodeMirror-gutter-filler { @@ -242,22 +348,195 @@

diff --git a/core/http/views/partials/inprogress.html b/core/http/views/partials/inprogress.html index 318406cb0881..7ebe04927d1b 100644 --- a/core/http/views/partials/inprogress.html +++ b/core/http/views/partials/inprogress.html @@ -120,8 +120,17 @@

throw new Error('Failed to fetch operations'); } const data = await response.json(); + const previousCount = this.operations.length; this.operations = data.operations || []; + // If we had operations before and now we don't, refresh the page + if (previousCount > 0 && this.operations.length === 0) { + // Small delay to ensure the user sees the completion + setTimeout(() => { + window.location.reload(); + }, 1000); + } + // Auto-collapse if there are many operations if (this.operations.length > 5 && !this.collapsed) { // Don't auto-collapse, let user control it From 1ad42a08d7425e5d662afe820ae6cee3045763d5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 11 Nov 2025 21:28:23 +0100 Subject: [PATCH 03/16] Move HF api client to pkg Signed-off-by: Ettore Di Giacinto --- .github/gallery-agent/agent.go | 4 +- .github/gallery-agent/go.mod | 38 ---- .github/gallery-agent/go.sum | 168 ------------------ .github/gallery-agent/main.go | 2 +- .github/gallery-agent/tools.go | 8 +- .github/workflows/gallery-agent.yaml | 9 +- go.mod | 2 +- .../hfapi => pkg/huggingface-api}/client.go | 0 .../huggingface-api}/client_test.go | 2 +- .../huggingface-api}/hfapi_suite_test.go | 2 + 10 files changed, 12 insertions(+), 223 deletions(-) delete mode 100644 .github/gallery-agent/go.mod delete mode 100644 .github/gallery-agent/go.sum rename {.github/gallery-agent/hfapi => pkg/huggingface-api}/client.go (100%) rename {.github/gallery-agent/hfapi => pkg/huggingface-api}/client_test.go (99%) rename {.github/gallery-agent/hfapi => pkg/huggingface-api}/hfapi_suite_test.go (99%) diff --git a/.github/gallery-agent/agent.go b/.github/gallery-agent/agent.go index 19262669f574..687b5a1eb77f 100644 --- a/.github/gallery-agent/agent.go +++ b/.github/gallery-agent/agent.go @@ -7,8 +7,8 @@ import ( "slices" "strings" - "github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi" - "github.com/mudler/cogito" + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" + cogito "github.com/mudler/cogito" "github.com/mudler/cogito/structures" "github.com/sashabaranov/go-openai/jsonschema" diff --git a/.github/gallery-agent/go.mod b/.github/gallery-agent/go.mod deleted file mode 100644 index 7129f5507bb4..000000000000 --- a/.github/gallery-agent/go.mod +++ /dev/null @@ -1,38 +0,0 @@ -module github.com/go-skynet/LocalAI/.github/gallery-agent - -go 1.24.1 - -require ( - github.com/mudler/cogito v0.3.0 - github.com/onsi/ginkgo/v2 v2.25.3 - github.com/onsi/gomega v1.38.2 - github.com/sashabaranov/go-openai v1.41.2 - github.com/tmc/langchaingo v0.1.13 -) - -require ( - dario.cat/mergo v1.0.1 // indirect - github.com/Masterminds/goutils v1.1.1 // indirect - github.com/Masterminds/semver/v3 v3.4.0 // indirect - github.com/Masterminds/sprig/v3 v3.3.0 // indirect - github.com/go-logr/logr v1.4.3 // indirect - github.com/go-task/slim-sprig/v3 v3.0.0 // indirect - github.com/google/go-cmp v0.7.0 // indirect - github.com/google/jsonschema-go v0.3.0 // indirect - github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect - github.com/google/uuid v1.6.0 // indirect - github.com/huandu/xstrings v1.5.0 // indirect - github.com/mitchellh/copystructure v1.2.0 // indirect - github.com/mitchellh/reflectwalk v1.0.2 // indirect - github.com/modelcontextprotocol/go-sdk v1.0.0 // indirect - github.com/shopspring/decimal v1.4.0 // indirect - github.com/spf13/cast v1.7.0 // indirect - github.com/yosida95/uritemplate/v3 v3.0.2 // indirect - go.uber.org/automaxprocs v1.6.0 // indirect - go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/crypto v0.41.0 // indirect - golang.org/x/net v0.43.0 // indirect - golang.org/x/sys v0.35.0 // indirect - golang.org/x/text v0.28.0 // indirect - golang.org/x/tools v0.36.0 // indirect -) diff --git a/.github/gallery-agent/go.sum b/.github/gallery-agent/go.sum deleted file mode 100644 index a5488f5b60a5..000000000000 --- a/.github/gallery-agent/go.sum +++ /dev/null @@ -1,168 +0,0 @@ -dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= -dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= -github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= -github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= -github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= -github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= -github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= -github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= -github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs= -github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0= -github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= -github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= -github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= -github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= -github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= -github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= -github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= -github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= -github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= -github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= -github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A= -github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw= -github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA= -github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= -github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= -github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw= -github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= -github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= -github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= -github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw= -github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= -github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= -github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= -github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= -github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= -github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= -github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= -github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= -github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= -github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/jsonschema-go v0.3.0 h1:6AH2TxVNtk3IlvkkhjrtbUc4S8AvO0Xii0DxIygDg+Q= -github.com/google/jsonschema-go v0.3.0/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= -github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= -github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= -github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= -github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= -github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= -github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= -github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= -github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE= -github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= -github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= -github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= -github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= -github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= -github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= -github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= -github.com/moby/go-archive v0.1.0 h1:Kk/5rdW/g+H8NHdJW2gsXyZ7UnzvJNOy6VKJqueWdcQ= -github.com/moby/go-archive v0.1.0/go.mod h1:G9B+YoujNohJmrIYFBpSd54GTUB4lt9S+xVQvsJyFuo= -github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk= -github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc= -github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= -github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko= -github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs= -github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs= -github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= -github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= -github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= -github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= -github.com/modelcontextprotocol/go-sdk v1.0.0 h1:Z4MSjLi38bTgLrd/LjSmofqRqyBiVKRyQSJgw8q8V74= -github.com/modelcontextprotocol/go-sdk v1.0.0/go.mod h1:nYtYQroQ2KQiM0/SbyEPUWQ6xs4B95gJjEalc9AQyOs= -github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= -github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= -github.com/mudler/cogito v0.3.0 h1:NbVAO3bLkK5oGSY0xq87jlz8C9OIsLW55s+8Hfzeu9s= -github.com/mudler/cogito v0.3.0/go.mod h1:abMwl+CUjCp87IufA2quZdZt0bbLaHHN79o17HbUKxU= -github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw= -github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE= -github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= -github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= -github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= -github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= -github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= -github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= -github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= -github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= -github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= -github.com/sashabaranov/go-openai v1.41.2 h1:vfPRBZNMpnqu8ELsclWcAvF19lDNgh1t6TVfFFOPiSM= -github.com/sashabaranov/go-openai v1.41.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg= -github.com/shirou/gopsutil/v4 v4.25.5 h1:rtd9piuSMGeU8g1RMXjZs9y9luK5BwtnG7dZaQUJAsc= -github.com/shirou/gopsutil/v4 v4.25.5/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c= -github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= -github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w= -github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= -github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= -github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -github.com/testcontainers/testcontainers-go v0.38.0 h1:d7uEapLcv2P8AvH8ahLqDMMxda2W9gQN1nRbHS28HBw= -github.com/testcontainers/testcontainers-go v0.38.0/go.mod h1:C52c9MoHpWO+C4aqmgSU+hxlR5jlEayWtgYrb8Pzz1w= -github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= -github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= -github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk= -github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= -github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA= -github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg= -github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= -github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= -github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= -github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= -go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= -go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 h1:Xs2Ncz0gNihqu9iosIZ5SkBbWo5T8JhhLJFMQL1qmLI= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0/go.mod h1:vy+2G/6NvVMpwGX/NyLqcC41fxepnuKHk16E6IZUcJc= -go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= -go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= -go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= -go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= -go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= -go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= -go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= -go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= -go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= -go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= -golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4= -golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc= -golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= -golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= -golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= -golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= -golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= -golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= -golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= -google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= -google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/.github/gallery-agent/main.go b/.github/gallery-agent/main.go index 78d3a6cd7612..40412f198d4f 100644 --- a/.github/gallery-agent/main.go +++ b/.github/gallery-agent/main.go @@ -9,7 +9,7 @@ import ( "strings" "time" - "github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi" + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" ) // ProcessedModelFile represents a processed model file with additional metadata diff --git a/.github/gallery-agent/tools.go b/.github/gallery-agent/tools.go index b44c327764f9..3e2fc2f3a17c 100644 --- a/.github/gallery-agent/tools.go +++ b/.github/gallery-agent/tools.go @@ -3,9 +3,9 @@ package main import ( "fmt" - "github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi" - "github.com/sashabaranov/go-openai" - "github.com/tmc/langchaingo/jsonschema" + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" + openai "github.com/sashabaranov/go-openai" + jsonschema "github.com/sashabaranov/go-openai/jsonschema" ) // Get repository README from HF @@ -13,7 +13,7 @@ type HFReadmeTool struct { client *hfapi.Client } -func (s *HFReadmeTool) Run(args map[string]any) (string, error) { +func (s *HFReadmeTool) Execute(args map[string]any) (string, error) { q, ok := args["repository"].(string) if !ok { return "", fmt.Errorf("no query") diff --git a/.github/workflows/gallery-agent.yaml b/.github/workflows/gallery-agent.yaml index e796d2f2cba3..4c4074bbf215 100644 --- a/.github/workflows/gallery-agent.yaml +++ b/.github/workflows/gallery-agent.yaml @@ -39,11 +39,6 @@ jobs: with: go-version: '1.21' - - name: Build gallery agent - run: | - cd .github/gallery-agent - go mod download - go build -o gallery-agent . - name: Run gallery agent env: @@ -56,9 +51,7 @@ jobs: MAX_MODELS: ${{ github.event.inputs.max_models || '1' }} run: | export GALLERY_INDEX_PATH=$PWD/gallery/index.yaml - cd .github/gallery-agent - ./gallery-agent - rm -rf gallery-agent + go run .github/gallery-agent - name: Check for changes id: check_changes diff --git a/go.mod b/go.mod index f520ff4edfed..464ec1553086 100644 --- a/go.mod +++ b/go.mod @@ -59,6 +59,7 @@ require ( go.opentelemetry.io/otel/metric v1.38.0 go.opentelemetry.io/otel/sdk/metric v1.38.0 google.golang.org/grpc v1.76.0 + google.golang.org/protobuf v1.36.8 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 oras.land/oras-go/v2 v2.6.0 @@ -148,7 +149,6 @@ require ( golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053 // indirect golang.org/x/time v0.12.0 // indirect - google.golang.org/protobuf v1.36.8 // indirect ) require ( diff --git a/.github/gallery-agent/hfapi/client.go b/pkg/huggingface-api/client.go similarity index 100% rename from .github/gallery-agent/hfapi/client.go rename to pkg/huggingface-api/client.go diff --git a/.github/gallery-agent/hfapi/client_test.go b/pkg/huggingface-api/client_test.go similarity index 99% rename from .github/gallery-agent/hfapi/client_test.go rename to pkg/huggingface-api/client_test.go index c490d0e9deb3..37a381416125 100644 --- a/.github/gallery-agent/hfapi/client_test.go +++ b/pkg/huggingface-api/client_test.go @@ -8,7 +8,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi" + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" ) var _ = Describe("HuggingFace API Client", func() { diff --git a/.github/gallery-agent/hfapi/hfapi_suite_test.go b/pkg/huggingface-api/hfapi_suite_test.go similarity index 99% rename from .github/gallery-agent/hfapi/hfapi_suite_test.go rename to pkg/huggingface-api/hfapi_suite_test.go index 6dca0c029cbe..ec4654c5a9a7 100644 --- a/.github/gallery-agent/hfapi/hfapi_suite_test.go +++ b/pkg/huggingface-api/hfapi_suite_test.go @@ -11,3 +11,5 @@ func TestHfapi(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "HuggingFace API Suite") } + + From cbf567a92ba7bdb92e0f0a52c2e2b40ac525e25f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 11 Nov 2025 21:54:21 +0100 Subject: [PATCH 04/16] Add simple importer for gguf files Signed-off-by: Ettore Di Giacinto --- core/gallery/importers/llama-cpp.go | 92 +++++++++++++++++++++ core/http/endpoints/localai/import_model.go | 25 +++++- 2 files changed, 113 insertions(+), 4 deletions(-) create mode 100644 core/gallery/importers/llama-cpp.go diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go new file mode 100644 index 000000000000..e91afe9972d8 --- /dev/null +++ b/core/gallery/importers/llama-cpp.go @@ -0,0 +1,92 @@ +package importers + +import ( + "encoding/json" + "path/filepath" + "strings" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/pkg/functions" + "go.yaml.in/yaml/v2" +) + +type LlamaCPPImporter struct{} + +func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest) bool { + preferences, err := request.Preferences.MarshalJSON() + if err != nil { + return false + } + preferencesMap := make(map[string]any) + err = json.Unmarshal(preferences, &preferencesMap) + if err != nil { + return false + } + + if preferencesMap["backend"] == "llama-cpp" { + return true + } + + return strings.HasSuffix(uri, ".gguf") +} + +func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) { + preferences, err := request.Preferences.MarshalJSON() + if err != nil { + return gallery.ModelConfig{}, err + } + preferencesMap := make(map[string]any) + err = json.Unmarshal(preferences, &preferencesMap) + if err != nil { + return gallery.ModelConfig{}, err + } + + name, ok := preferencesMap["name"].(string) + if !ok { + name = filepath.Base(uri) + } + + description, ok := preferencesMap["description"].(string) + if !ok { + description = "Imported from " + uri + } + + modelConfig := config.ModelConfig{ + Name: name, + Description: description, + KnownUsecaseStrings: []string{"chat"}, + Backend: "llama-cpp", + PredictionOptions: schema.PredictionOptions{ + BasicModelRequest: schema.BasicModelRequest{ + Model: filepath.Base(uri), + }, + }, + TemplateConfig: config.TemplateConfig{ + UseTokenizerTemplate: true, + }, + FunctionsConfig: functions.FunctionsConfig{ + GrammarConfig: functions.GrammarConfig{ + NoGrammar: true, + }, + }, + } + + data, err := yaml.Marshal(modelConfig) + if err != nil { + return gallery.ModelConfig{}, err + } + + return gallery.ModelConfig{ + Name: name, + Description: description, + ConfigFile: string(data), + Files: []gallery.File{ + { + URI: uri, + Filename: filepath.Base(uri), + }, + }, + }, nil +} diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go index e579642ed10e..eb6dbc80322c 100644 --- a/core/http/endpoints/localai/import_model.go +++ b/core/http/endpoints/localai/import_model.go @@ -11,6 +11,7 @@ import ( "github.com/google/uuid" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/gallery/importers" httpUtils "github.com/mudler/LocalAI/core/http/utils" "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/services" @@ -19,6 +20,15 @@ import ( "gopkg.in/yaml.v3" ) +type Importer interface { + Match(uri string, request schema.ImportModelRequest) bool + Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) +} + +var defaultImporters = []Importer{ + &importers.LlamaCPPImporter{}, +} + // ImportModelURIEndpoint handles creating new model configurations from a URI func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) fiber.Handler { return func(c *fiber.Ctx) error { @@ -29,10 +39,17 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl return err } - // From the input, we are going to extract a default settings for the model, and use the gallery service - // TODO: This is now all fake data - modelConfig := gallery.ModelConfig{ - Name: "imported-model-name", + var err error + var modelConfig gallery.ModelConfig + + for _, importer := range defaultImporters { + if importer.Match(input.URI, *input) { + modelConfig, err = importer.Import(input.URI, *input) + if err != nil { + continue + } + break + } } uuid, err := uuid.NewUUID() From 0a9d6fa75d860abc59182977f9033d1f27e361fc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 11 Nov 2025 21:58:59 +0100 Subject: [PATCH 05/16] Add opcache Signed-off-by: Ettore Di Giacinto --- core/http/app.go | 11 ++++++++--- core/http/endpoints/localai/import_model.go | 21 +++++++++++++++++---- core/http/routes/localai.go | 5 +++-- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/core/http/app.go b/core/http/app.go index dcd9a2219958..916ea9600b81 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -200,11 +200,16 @@ func API(application *application.Application) (*fiber.App, error) { requestExtractor := middleware.NewRequestExtractor(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig()) routes.RegisterElevenLabsRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig()) - routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService()) + + // Create opcache for tracking UI operations (used by both UI and LocalAI routes) + var opcache *services.OpCache + if !application.ApplicationConfig().DisableWebUI { + opcache = services.NewOpCache(application.GalleryService()) + } + + routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache) routes.RegisterOpenAIRoutes(router, requestExtractor, application) if !application.ApplicationConfig().DisableWebUI { - // Create opcache for tracking UI operations - opcache := services.NewOpCache(application.GalleryService()) routes.RegisterUIAPIRoutes(router, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), opcache) routes.RegisterUIRoutes(router, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService()) } diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go index eb6dbc80322c..73e47e9b549d 100644 --- a/core/http/endpoints/localai/import_model.go +++ b/core/http/endpoints/localai/import_model.go @@ -30,7 +30,7 @@ var defaultImporters = []Importer{ } // ImportModelURIEndpoint handles creating new model configurations from a URI -func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) fiber.Handler { +func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) fiber.Handler { return func(c *fiber.Ctx) error { input := new(schema.ImportModelRequest) @@ -56,13 +56,26 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl if err != nil { return err } + + // Determine gallery ID for tracking - use model name if available, otherwise use URI + galleryID := input.URI + if modelConfig.Name != "" { + galleryID = modelConfig.Name + } + + // Register operation in opcache if available (for UI progress tracking) + if opcache != nil { + opcache.Set(galleryID, uuid.String()) + } + galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{ Req: gallery.GalleryModel{ Overrides: map[string]interface{}{}, }, - ID: uuid.String(), - GalleryElement: &modelConfig, - BackendGalleries: appConfig.BackendGalleries, + ID: uuid.String(), + GalleryElementName: galleryID, + GalleryElement: &modelConfig, + BackendGalleries: appConfig.BackendGalleries, } return c.JSON(schema.GalleryResponse{ diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 119a02b55311..5d9ae821007d 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -18,7 +18,8 @@ func RegisterLocalAIRoutes(router *fiber.App, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, - galleryService *services.GalleryService) { + galleryService *services.GalleryService, + opcache *services.OpCache) { router.Get("/swagger/*", swagger.HandlerDefault) // default @@ -58,7 +59,7 @@ func RegisterLocalAIRoutes(router *fiber.App, router.Post("/models/import", localai.ImportModelEndpoint(cl, appConfig)) // URI model import endpoint - router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService)) + router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache)) // Custom model edit endpoint router.Post("/models/edit/:name", localai.EditModelEndpoint(cl, appConfig)) From 9c0fda137442aedcb065772860c2129a68086507 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 11 Nov 2025 22:22:52 +0100 Subject: [PATCH 06/16] wire importers to CLI Signed-off-by: Ettore Di Giacinto --- core/application/startup.go | 12 +++-- core/cli/models.go | 10 +++- core/gallery/importers/importers.go | 15 ++++++ core/gallery/importers/llama-cpp.go | 2 + core/http/endpoints/localai/import_model.go | 11 +--- core/startup/model_preload.go | 57 ++++++++++++++++++++- core/startup/model_preload_test.go | 4 +- 7 files changed, 91 insertions(+), 20 deletions(-) create mode 100644 core/gallery/importers/importers.go diff --git a/core/application/startup.go b/core/application/startup.go index a497e0e6057e..8e2387b9226f 100644 --- a/core/application/startup.go +++ b/core/application/startup.go @@ -22,9 +22,15 @@ func New(opts ...config.AppOption) (*Application, error) { log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.SystemState.Model.ModelsPath) log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) + + if err := application.start(); err != nil { + return nil, err + } + caps, err := xsysinfo.CPUCapabilities() if err == nil { log.Debug().Msgf("CPU capabilities: %v", caps) + } gpus, err := xsysinfo.GPUs() if err == nil { @@ -56,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) { } } - if err := coreStartup.InstallModels(options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil { + if err := coreStartup.InstallModels(application.GalleryService(), options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil { log.Error().Err(err).Msg("error installing models") } @@ -152,10 +158,6 @@ func New(opts ...config.AppOption) (*Application, error) { // Watch the configuration directory startWatcher(options) - if err := application.start(); err != nil { - return nil, err - } - log.Info().Msg("core/startup process completed!") return application, nil } diff --git a/core/cli/models.go b/core/cli/models.go index 2b8da49c466a..dd5273317bfc 100644 --- a/core/cli/models.go +++ b/core/cli/models.go @@ -1,12 +1,14 @@ package cli import ( + "context" "encoding/json" "errors" "fmt" cliContext "github.com/mudler/LocalAI/core/cli/context" "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/startup" @@ -78,6 +80,12 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error { return err } + galleryService := services.NewGalleryService(&config.ApplicationConfig{}, model.NewModelLoader(systemState, true)) + err = galleryService.Start(context.Background(), config.NewModelConfigLoader(mi.ModelsPath), systemState) + if err != nil { + return err + } + var galleries []config.Gallery if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil { log.Error().Err(err).Msg("unable to load galleries") @@ -127,7 +135,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error { } modelLoader := model.NewModelLoader(systemState, true) - err = startup.InstallModels(galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName) + err = startup.InstallModels(galleryService, galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName) if err != nil { return err } diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go new file mode 100644 index 000000000000..5236af0cc245 --- /dev/null +++ b/core/gallery/importers/importers.go @@ -0,0 +1,15 @@ +package importers + +import ( + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/schema" +) + +var DefaultImporters = []Importer{ + &LlamaCPPImporter{}, +} + +type Importer interface { + Match(uri string, request schema.ImportModelRequest) bool + Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) +} diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go index e91afe9972d8..3596c868f50a 100644 --- a/core/gallery/importers/llama-cpp.go +++ b/core/gallery/importers/llama-cpp.go @@ -12,6 +12,8 @@ import ( "go.yaml.in/yaml/v2" ) +var _ Importer = &LlamaCPPImporter{} + type LlamaCPPImporter struct{} func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest) bool { diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go index 73e47e9b549d..ea71ae53ae5d 100644 --- a/core/http/endpoints/localai/import_model.go +++ b/core/http/endpoints/localai/import_model.go @@ -20,15 +20,6 @@ import ( "gopkg.in/yaml.v3" ) -type Importer interface { - Match(uri string, request schema.ImportModelRequest) bool - Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) -} - -var defaultImporters = []Importer{ - &importers.LlamaCPPImporter{}, -} - // ImportModelURIEndpoint handles creating new model configurations from a URI func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) fiber.Handler { return func(c *fiber.Ctx) error { @@ -42,7 +33,7 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl var err error var modelConfig gallery.ModelConfig - for _, importer := range defaultImporters { + for _, importer := range importers.DefaultImporters { if importer.Match(input.URI, *input) { modelConfig, err = importer.Import(input.URI, *input) if err != nil { diff --git a/core/startup/model_preload.go b/core/startup/model_preload.go index a98e0592bd80..4550a25e46f3 100644 --- a/core/startup/model_preload.go +++ b/core/startup/model_preload.go @@ -7,9 +7,14 @@ import ( "path" "path/filepath" "strings" + "time" + "github.com/google/uuid" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/gallery/importers" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/model" "github.com/mudler/LocalAI/pkg/system" @@ -25,7 +30,7 @@ const ( // InstallModels will preload models from the given list of URLs and galleries // It will download the model if it is not already present in the model path // It will also try to resolve if the model is an embedded model YAML configuration -func InstallModels(galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error { +func InstallModels(galleryService *services.GalleryService, galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error { // create an error that groups all errors var err error @@ -154,7 +159,55 @@ func InstallModels(galleries, backendGalleries []config.Gallery, systemState *sy err = errors.Join(err, e) } else if !found { log.Warn().Msgf("[startup] failed resolving model '%s'", url) - err = errors.Join(err, fmt.Errorf("failed resolving model '%s'", url)) + + if galleryService == nil { + err = errors.Join(err, fmt.Errorf("cannot start autoimporter, not sure how to handle this uri")) + continue + } + + // TODO: start autoimporter + var err error + var modelConfig gallery.ModelConfig + for _, importer := range importers.DefaultImporters { + if importer.Match(url, schema.ImportModelRequest{}) { + modelConfig, err = importer.Import(url, schema.ImportModelRequest{}) + if err != nil { + continue + } + break + } + } + + uuid, err := uuid.NewUUID() + if err != nil { + return err + } + + galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{ + Req: gallery.GalleryModel{ + Overrides: map[string]interface{}{}, + }, + ID: uuid.String(), + GalleryElementName: modelConfig.Name, + GalleryElement: &modelConfig, + BackendGalleries: backendGalleries, + } + + var status *services.GalleryOpStatus + // wait for op to finish + for { + status = galleryService.GetStatus(uuid.String()) + if status != nil && status.Processed { + break + } + time.Sleep(1 * time.Second) + } + + if status.Error != nil { + return status.Error + } + + log.Info().Msgf("[startup] imported model '%s' from '%s'", modelConfig.Name, url) } } } diff --git a/core/startup/model_preload_test.go b/core/startup/model_preload_test.go index 16096f41befa..255c3dc4e65a 100644 --- a/core/startup/model_preload_test.go +++ b/core/startup/model_preload_test.go @@ -33,7 +33,7 @@ var _ = Describe("Preload test", func() { url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml" fileName := fmt.Sprintf("%s.yaml", "phi-2") - InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url) + InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url) resultFile := filepath.Join(tmpdir, fileName) @@ -46,7 +46,7 @@ var _ = Describe("Preload test", func() { url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf" fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K") - err := InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url) + err := InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url) Expect(err).ToNot(HaveOccurred()) resultFile := filepath.Join(tmpdir, fileName) From af6070ac602235dc371d8ca95a216623fa8725cf Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 11 Nov 2025 22:30:39 +0100 Subject: [PATCH 07/16] Add omitempty to config fields Signed-off-by: Ettore Di Giacinto --- core/config/model_config.go | 292 ++++++++++++++++++------------------ core/schema/prediction.go | 49 +++--- core/schema/request.go | 3 +- pkg/functions/parse.go | 59 ++++---- 4 files changed, 208 insertions(+), 195 deletions(-) diff --git a/core/config/model_config.go b/core/config/model_config.go index 1dee82363d2f..41664f2a3dd4 100644 --- a/core/config/model_config.go +++ b/core/config/model_config.go @@ -16,30 +16,31 @@ const ( RAND_SEED = -1 ) +// @Description TTS configuration type TTSConfig struct { // Voice wav path or id - Voice string `yaml:"voice" json:"voice"` + Voice string `yaml:"voice,omitempty" json:"voice,omitempty"` - AudioPath string `yaml:"audio_path" json:"audio_path"` + AudioPath string `yaml:"audio_path,omitempty" json:"audio_path,omitempty"` } -// ModelConfig represents a model configuration +// @Description ModelConfig represents a model configuration type ModelConfig struct { modelConfigFile string `yaml:"-" json:"-"` - schema.PredictionOptions `yaml:"parameters" json:"parameters"` - Name string `yaml:"name" json:"name"` - - F16 *bool `yaml:"f16" json:"f16"` - Threads *int `yaml:"threads" json:"threads"` - Debug *bool `yaml:"debug" json:"debug"` - Roles map[string]string `yaml:"roles" json:"roles"` - Embeddings *bool `yaml:"embeddings" json:"embeddings"` - Backend string `yaml:"backend" json:"backend"` - TemplateConfig TemplateConfig `yaml:"template" json:"template"` - KnownUsecaseStrings []string `yaml:"known_usecases" json:"known_usecases"` + schema.PredictionOptions `yaml:"parameters,omitempty" json:"parameters,omitempty"` + Name string `yaml:"name,omitempty" json:"name,omitempty"` + + F16 *bool `yaml:"f16,omitempty" json:"f16,omitempty"` + Threads *int `yaml:"threads,omitempty" json:"threads,omitempty"` + Debug *bool `yaml:"debug,omitempty" json:"debug,omitempty"` + Roles map[string]string `yaml:"roles,omitempty" json:"roles,omitempty"` + Embeddings *bool `yaml:"embeddings,omitempty" json:"embeddings,omitempty"` + Backend string `yaml:"backend,omitempty" json:"backend,omitempty"` + TemplateConfig TemplateConfig `yaml:"template,omitempty" json:"template,omitempty"` + KnownUsecaseStrings []string `yaml:"known_usecases,omitempty" json:"known_usecases,omitempty"` KnownUsecases *ModelConfigUsecases `yaml:"-" json:"-"` - Pipeline Pipeline `yaml:"pipeline" json:"pipeline"` + Pipeline Pipeline `yaml:"pipeline,omitempty" json:"pipeline,omitempty"` PromptStrings, InputStrings []string `yaml:"-" json:"-"` InputToken [][]int `yaml:"-" json:"-"` @@ -47,50 +48,52 @@ type ModelConfig struct { ResponseFormat string `yaml:"-" json:"-"` ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"` - FunctionsConfig functions.FunctionsConfig `yaml:"function" json:"function"` + FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"` - FeatureFlag FeatureFlag `yaml:"feature_flags" json:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early. + FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early. // LLM configs (GPT4ALL, Llama.cpp, ...) LLMConfig `yaml:",inline" json:",inline"` // Diffusers - Diffusers Diffusers `yaml:"diffusers" json:"diffusers"` - Step int `yaml:"step" json:"step"` + Diffusers Diffusers `yaml:"diffusers,omitempty" json:"diffusers,omitempty"` + Step int `yaml:"step,omitempty" json:"step,omitempty"` // GRPC Options - GRPC GRPC `yaml:"grpc" json:"grpc"` + GRPC GRPC `yaml:"grpc,omitempty" json:"grpc,omitempty"` // TTS specifics - TTSConfig `yaml:"tts" json:"tts"` + TTSConfig `yaml:"tts,omitempty" json:"tts,omitempty"` // CUDA // Explicitly enable CUDA or not (some backends might need it) - CUDA bool `yaml:"cuda" json:"cuda"` + CUDA bool `yaml:"cuda,omitempty" json:"cuda,omitempty"` - DownloadFiles []File `yaml:"download_files" json:"download_files"` + DownloadFiles []File `yaml:"download_files,omitempty" json:"download_files,omitempty"` - Description string `yaml:"description" json:"description"` - Usage string `yaml:"usage" json:"usage"` + Description string `yaml:"description,omitempty" json:"description,omitempty"` + Usage string `yaml:"usage,omitempty" json:"usage,omitempty"` - Options []string `yaml:"options" json:"options"` - Overrides []string `yaml:"overrides" json:"overrides"` + Options []string `yaml:"options,omitempty" json:"options,omitempty"` + Overrides []string `yaml:"overrides,omitempty" json:"overrides,omitempty"` - MCP MCPConfig `yaml:"mcp" json:"mcp"` - Agent AgentConfig `yaml:"agent" json:"agent"` + MCP MCPConfig `yaml:"mcp,omitempty" json:"mcp,omitempty"` + Agent AgentConfig `yaml:"agent,omitempty" json:"agent,omitempty"` } +// @Description MCP configuration type MCPConfig struct { - Servers string `yaml:"remote" json:"remote"` - Stdio string `yaml:"stdio" json:"stdio"` + Servers string `yaml:"remote,omitempty" json:"remote,omitempty"` + Stdio string `yaml:"stdio,omitempty" json:"stdio,omitempty"` } +// @Description Agent configuration type AgentConfig struct { - MaxAttempts int `yaml:"max_attempts" json:"max_attempts"` - MaxIterations int `yaml:"max_iterations" json:"max_iterations"` - EnableReasoning bool `yaml:"enable_reasoning" json:"enable_reasoning"` - EnablePlanning bool `yaml:"enable_planning" json:"enable_planning"` - EnableMCPPrompts bool `yaml:"enable_mcp_prompts" json:"enable_mcp_prompts"` - EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator" json:"enable_plan_re_evaluator"` + MaxAttempts int `yaml:"max_attempts,omitempty" json:"max_attempts,omitempty"` + MaxIterations int `yaml:"max_iterations,omitempty" json:"max_iterations,omitempty"` + EnableReasoning bool `yaml:"enable_reasoning,omitempty" json:"enable_reasoning,omitempty"` + EnablePlanning bool `yaml:"enable_planning,omitempty" json:"enable_planning,omitempty"` + EnableMCPPrompts bool `yaml:"enable_mcp_prompts,omitempty" json:"enable_mcp_prompts,omitempty"` + EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator,omitempty" json:"enable_plan_re_evaluator,omitempty"` } func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCPGenericConfig[MCPSTDIOServers], error) { @@ -107,35 +110,39 @@ func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCP return remote, stdio, nil } +// @Description MCP generic configuration type MCPGenericConfig[T any] struct { - Servers T `yaml:"mcpServers" json:"mcpServers"` + Servers T `yaml:"mcpServers,omitempty" json:"mcpServers,omitempty"` } type MCPRemoteServers map[string]MCPRemoteServer type MCPSTDIOServers map[string]MCPSTDIOServer +// @Description MCP remote server configuration type MCPRemoteServer struct { - URL string `json:"url"` - Token string `json:"token"` + URL string `json:"url,omitempty"` + Token string `json:"token,omitempty"` } +// @Description MCP STDIO server configuration type MCPSTDIOServer struct { - Args []string `json:"args"` - Env map[string]string `json:"env"` - Command string `json:"command"` + Args []string `json:"args,omitempty"` + Env map[string]string `json:"env,omitempty"` + Command string `json:"command,omitempty"` } -// Pipeline defines other models to use for audio-to-audio +// @Description Pipeline defines other models to use for audio-to-audio type Pipeline struct { - TTS string `yaml:"tts" json:"tts"` - LLM string `yaml:"llm" json:"llm"` - Transcription string `yaml:"transcription" json:"transcription"` - VAD string `yaml:"vad" json:"vad"` + TTS string `yaml:"tts,omitempty" json:"tts,omitempty"` + LLM string `yaml:"llm,omitempty" json:"llm,omitempty"` + Transcription string `yaml:"transcription,omitempty" json:"transcription,omitempty"` + VAD string `yaml:"vad,omitempty" json:"vad,omitempty"` } +// @Description File configuration for model downloads type File struct { - Filename string `yaml:"filename" json:"filename"` - SHA256 string `yaml:"sha256" json:"sha256"` - URI downloader.URI `yaml:"uri" json:"uri"` + Filename string `yaml:"filename,omitempty" json:"filename,omitempty"` + SHA256 string `yaml:"sha256,omitempty" json:"sha256,omitempty"` + URI downloader.URI `yaml:"uri,omitempty" json:"uri,omitempty"` } type FeatureFlag map[string]*bool @@ -147,124 +154,125 @@ func (ff FeatureFlag) Enabled(s string) bool { return false } +// @Description GRPC configuration type GRPC struct { - Attempts int `yaml:"attempts" json:"attempts"` - AttemptsSleepTime int `yaml:"attempts_sleep_time" json:"attempts_sleep_time"` + Attempts int `yaml:"attempts,omitempty" json:"attempts,omitempty"` + AttemptsSleepTime int `yaml:"attempts_sleep_time,omitempty" json:"attempts_sleep_time,omitempty"` } +// @Description Diffusers configuration type Diffusers struct { - CUDA bool `yaml:"cuda" json:"cuda"` - PipelineType string `yaml:"pipeline_type" json:"pipeline_type"` - SchedulerType string `yaml:"scheduler_type" json:"scheduler_type"` - EnableParameters string `yaml:"enable_parameters" json:"enable_parameters"` // A list of comma separated parameters to specify - IMG2IMG bool `yaml:"img2img" json:"img2img"` // Image to Image Diffuser - ClipSkip int `yaml:"clip_skip" json:"clip_skip"` // Skip every N frames - ClipModel string `yaml:"clip_model" json:"clip_model"` // Clip model to use - ClipSubFolder string `yaml:"clip_subfolder" json:"clip_subfolder"` // Subfolder to use for clip model - ControlNet string `yaml:"control_net" json:"control_net"` -} - -// LLMConfig is a struct that holds the configuration that are -// generic for most of the LLM backends. + CUDA bool `yaml:"cuda,omitempty" json:"cuda,omitempty"` + PipelineType string `yaml:"pipeline_type,omitempty" json:"pipeline_type,omitempty"` + SchedulerType string `yaml:"scheduler_type,omitempty" json:"scheduler_type,omitempty"` + EnableParameters string `yaml:"enable_parameters,omitempty" json:"enable_parameters,omitempty"` // A list of comma separated parameters to specify + IMG2IMG bool `yaml:"img2img,omitempty" json:"img2img,omitempty"` // Image to Image Diffuser + ClipSkip int `yaml:"clip_skip,omitempty" json:"clip_skip,omitempty"` // Skip every N frames + ClipModel string `yaml:"clip_model,omitempty" json:"clip_model,omitempty"` // Clip model to use + ClipSubFolder string `yaml:"clip_subfolder,omitempty" json:"clip_subfolder,omitempty"` // Subfolder to use for clip model + ControlNet string `yaml:"control_net,omitempty" json:"control_net,omitempty"` +} + +// @Description LLMConfig is a struct that holds the configuration that are generic for most of the LLM backends. type LLMConfig struct { - SystemPrompt string `yaml:"system_prompt" json:"system_prompt"` - TensorSplit string `yaml:"tensor_split" json:"tensor_split"` - MainGPU string `yaml:"main_gpu" json:"main_gpu"` - RMSNormEps float32 `yaml:"rms_norm_eps" json:"rms_norm_eps"` - NGQA int32 `yaml:"ngqa" json:"ngqa"` - PromptCachePath string `yaml:"prompt_cache_path" json:"prompt_cache_path"` - PromptCacheAll bool `yaml:"prompt_cache_all" json:"prompt_cache_all"` - PromptCacheRO bool `yaml:"prompt_cache_ro" json:"prompt_cache_ro"` - MirostatETA *float64 `yaml:"mirostat_eta" json:"mirostat_eta"` - MirostatTAU *float64 `yaml:"mirostat_tau" json:"mirostat_tau"` - Mirostat *int `yaml:"mirostat" json:"mirostat"` - NGPULayers *int `yaml:"gpu_layers" json:"gpu_layers"` - MMap *bool `yaml:"mmap" json:"mmap"` - MMlock *bool `yaml:"mmlock" json:"mmlock"` - LowVRAM *bool `yaml:"low_vram" json:"low_vram"` - Reranking *bool `yaml:"reranking" json:"reranking"` - Grammar string `yaml:"grammar" json:"grammar"` - StopWords []string `yaml:"stopwords" json:"stopwords"` - Cutstrings []string `yaml:"cutstrings" json:"cutstrings"` - ExtractRegex []string `yaml:"extract_regex" json:"extract_regex"` - TrimSpace []string `yaml:"trimspace" json:"trimspace"` - TrimSuffix []string `yaml:"trimsuffix" json:"trimsuffix"` - - ContextSize *int `yaml:"context_size" json:"context_size"` - NUMA bool `yaml:"numa" json:"numa"` - LoraAdapter string `yaml:"lora_adapter" json:"lora_adapter"` - LoraBase string `yaml:"lora_base" json:"lora_base"` - LoraAdapters []string `yaml:"lora_adapters" json:"lora_adapters"` - LoraScales []float32 `yaml:"lora_scales" json:"lora_scales"` - LoraScale float32 `yaml:"lora_scale" json:"lora_scale"` - NoMulMatQ bool `yaml:"no_mulmatq" json:"no_mulmatq"` - DraftModel string `yaml:"draft_model" json:"draft_model"` - NDraft int32 `yaml:"n_draft" json:"n_draft"` - Quantization string `yaml:"quantization" json:"quantization"` - LoadFormat string `yaml:"load_format" json:"load_format"` - GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization" json:"gpu_memory_utilization"` // vLLM - TrustRemoteCode bool `yaml:"trust_remote_code" json:"trust_remote_code"` // vLLM - EnforceEager bool `yaml:"enforce_eager" json:"enforce_eager"` // vLLM - SwapSpace int `yaml:"swap_space" json:"swap_space"` // vLLM - MaxModelLen int `yaml:"max_model_len" json:"max_model_len"` // vLLM - TensorParallelSize int `yaml:"tensor_parallel_size" json:"tensor_parallel_size"` // vLLM - DisableLogStatus bool `yaml:"disable_log_stats" json:"disable_log_stats"` // vLLM - DType string `yaml:"dtype" json:"dtype"` // vLLM - LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt" json:"limit_mm_per_prompt"` // vLLM - MMProj string `yaml:"mmproj" json:"mmproj"` - - FlashAttention *string `yaml:"flash_attention" json:"flash_attention"` - NoKVOffloading bool `yaml:"no_kv_offloading" json:"no_kv_offloading"` - CacheTypeK string `yaml:"cache_type_k" json:"cache_type_k"` - CacheTypeV string `yaml:"cache_type_v" json:"cache_type_v"` - - RopeScaling string `yaml:"rope_scaling" json:"rope_scaling"` - ModelType string `yaml:"type" json:"type"` - - YarnExtFactor float32 `yaml:"yarn_ext_factor" json:"yarn_ext_factor"` - YarnAttnFactor float32 `yaml:"yarn_attn_factor" json:"yarn_attn_factor"` - YarnBetaFast float32 `yaml:"yarn_beta_fast" json:"yarn_beta_fast"` - YarnBetaSlow float32 `yaml:"yarn_beta_slow" json:"yarn_beta_slow"` - - CFGScale float32 `yaml:"cfg_scale" json:"cfg_scale"` // Classifier-Free Guidance Scale -} - -// LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM + SystemPrompt string `yaml:"system_prompt,omitempty" json:"system_prompt,omitempty"` + TensorSplit string `yaml:"tensor_split,omitempty" json:"tensor_split,omitempty"` + MainGPU string `yaml:"main_gpu,omitempty" json:"main_gpu,omitempty"` + RMSNormEps float32 `yaml:"rms_norm_eps,omitempty" json:"rms_norm_eps,omitempty"` + NGQA int32 `yaml:"ngqa,omitempty" json:"ngqa,omitempty"` + PromptCachePath string `yaml:"prompt_cache_path,omitempty" json:"prompt_cache_path,omitempty"` + PromptCacheAll bool `yaml:"prompt_cache_all,omitempty" json:"prompt_cache_all,omitempty"` + PromptCacheRO bool `yaml:"prompt_cache_ro,omitempty" json:"prompt_cache_ro,omitempty"` + MirostatETA *float64 `yaml:"mirostat_eta,omitempty" json:"mirostat_eta,omitempty"` + MirostatTAU *float64 `yaml:"mirostat_tau,omitempty" json:"mirostat_tau,omitempty"` + Mirostat *int `yaml:"mirostat,omitempty" json:"mirostat,omitempty"` + NGPULayers *int `yaml:"gpu_layers,omitempty" json:"gpu_layers,omitempty"` + MMap *bool `yaml:"mmap,omitempty" json:"mmap,omitempty"` + MMlock *bool `yaml:"mmlock,omitempty" json:"mmlock,omitempty"` + LowVRAM *bool `yaml:"low_vram,omitempty" json:"low_vram,omitempty"` + Reranking *bool `yaml:"reranking,omitempty" json:"reranking,omitempty"` + Grammar string `yaml:"grammar,omitempty" json:"grammar,omitempty"` + StopWords []string `yaml:"stopwords,omitempty" json:"stopwords,omitempty"` + Cutstrings []string `yaml:"cutstrings,omitempty" json:"cutstrings,omitempty"` + ExtractRegex []string `yaml:"extract_regex,omitempty" json:"extract_regex,omitempty"` + TrimSpace []string `yaml:"trimspace,omitempty" json:"trimspace,omitempty"` + TrimSuffix []string `yaml:"trimsuffix,omitempty" json:"trimsuffix,omitempty"` + + ContextSize *int `yaml:"context_size,omitempty" json:"context_size,omitempty"` + NUMA bool `yaml:"numa,omitempty" json:"numa,omitempty"` + LoraAdapter string `yaml:"lora_adapter,omitempty" json:"lora_adapter,omitempty"` + LoraBase string `yaml:"lora_base,omitempty" json:"lora_base,omitempty"` + LoraAdapters []string `yaml:"lora_adapters,omitempty" json:"lora_adapters,omitempty"` + LoraScales []float32 `yaml:"lora_scales,omitempty" json:"lora_scales,omitempty"` + LoraScale float32 `yaml:"lora_scale,omitempty" json:"lora_scale,omitempty"` + NoMulMatQ bool `yaml:"no_mulmatq,omitempty" json:"no_mulmatq,omitempty"` + DraftModel string `yaml:"draft_model,omitempty" json:"draft_model,omitempty"` + NDraft int32 `yaml:"n_draft,omitempty" json:"n_draft,omitempty"` + Quantization string `yaml:"quantization,omitempty" json:"quantization,omitempty"` + LoadFormat string `yaml:"load_format,omitempty" json:"load_format,omitempty"` + GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization,omitempty" json:"gpu_memory_utilization,omitempty"` // vLLM + TrustRemoteCode bool `yaml:"trust_remote_code,omitempty" json:"trust_remote_code,omitempty"` // vLLM + EnforceEager bool `yaml:"enforce_eager,omitempty" json:"enforce_eager,omitempty"` // vLLM + SwapSpace int `yaml:"swap_space,omitempty" json:"swap_space,omitempty"` // vLLM + MaxModelLen int `yaml:"max_model_len,omitempty" json:"max_model_len,omitempty"` // vLLM + TensorParallelSize int `yaml:"tensor_parallel_size,omitempty" json:"tensor_parallel_size,omitempty"` // vLLM + DisableLogStatus bool `yaml:"disable_log_stats,omitempty" json:"disable_log_stats,omitempty"` // vLLM + DType string `yaml:"dtype,omitempty" json:"dtype,omitempty"` // vLLM + LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt,omitempty" json:"limit_mm_per_prompt,omitempty"` // vLLM + MMProj string `yaml:"mmproj,omitempty" json:"mmproj,omitempty"` + + FlashAttention *string `yaml:"flash_attention,omitempty" json:"flash_attention,omitempty"` + NoKVOffloading bool `yaml:"no_kv_offloading,omitempty" json:"no_kv_offloading,omitempty"` + CacheTypeK string `yaml:"cache_type_k,omitempty" json:"cache_type_k,omitempty"` + CacheTypeV string `yaml:"cache_type_v,omitempty" json:"cache_type_v,omitempty"` + + RopeScaling string `yaml:"rope_scaling,omitempty" json:"rope_scaling,omitempty"` + ModelType string `yaml:"type,omitempty" json:"type,omitempty"` + + YarnExtFactor float32 `yaml:"yarn_ext_factor,omitempty" json:"yarn_ext_factor,omitempty"` + YarnAttnFactor float32 `yaml:"yarn_attn_factor,omitempty" json:"yarn_attn_factor,omitempty"` + YarnBetaFast float32 `yaml:"yarn_beta_fast,omitempty" json:"yarn_beta_fast,omitempty"` + YarnBetaSlow float32 `yaml:"yarn_beta_slow,omitempty" json:"yarn_beta_slow,omitempty"` + + CFGScale float32 `yaml:"cfg_scale,omitempty" json:"cfg_scale,omitempty"` // Classifier-Free Guidance Scale +} + +// @Description LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM type LimitMMPerPrompt struct { - LimitImagePerPrompt int `yaml:"image" json:"image"` - LimitVideoPerPrompt int `yaml:"video" json:"video"` - LimitAudioPerPrompt int `yaml:"audio" json:"audio"` + LimitImagePerPrompt int `yaml:"image,omitempty" json:"image,omitempty"` + LimitVideoPerPrompt int `yaml:"video,omitempty" json:"video,omitempty"` + LimitAudioPerPrompt int `yaml:"audio,omitempty" json:"audio,omitempty"` } -// TemplateConfig is a struct that holds the configuration of the templating system +// @Description TemplateConfig is a struct that holds the configuration of the templating system type TemplateConfig struct { // Chat is the template used in the chat completion endpoint - Chat string `yaml:"chat" json:"chat"` + Chat string `yaml:"chat,omitempty" json:"chat,omitempty"` // ChatMessage is the template used for chat messages - ChatMessage string `yaml:"chat_message" json:"chat_message"` + ChatMessage string `yaml:"chat_message,omitempty" json:"chat_message,omitempty"` // Completion is the template used for completion requests - Completion string `yaml:"completion" json:"completion"` + Completion string `yaml:"completion,omitempty" json:"completion,omitempty"` // Edit is the template used for edit completion requests - Edit string `yaml:"edit" json:"edit"` + Edit string `yaml:"edit,omitempty" json:"edit,omitempty"` // Functions is the template used when tools are present in the client requests - Functions string `yaml:"function" json:"function"` + Functions string `yaml:"function,omitempty" json:"function,omitempty"` // UseTokenizerTemplate is a flag that indicates if the tokenizer template should be used. // Note: this is mostly consumed for backends such as vllm and transformers // that can use the tokenizers specified in the JSON config files of the models - UseTokenizerTemplate bool `yaml:"use_tokenizer_template" json:"use_tokenizer_template"` + UseTokenizerTemplate bool `yaml:"use_tokenizer_template,omitempty" json:"use_tokenizer_template,omitempty"` // JoinChatMessagesByCharacter is a string that will be used to join chat messages together. // It defaults to \n - JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character" json:"join_chat_messages_by_character"` + JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character,omitempty" json:"join_chat_messages_by_character,omitempty"` - Multimodal string `yaml:"multimodal" json:"multimodal"` + Multimodal string `yaml:"multimodal,omitempty" json:"multimodal,omitempty"` - ReplyPrefix string `yaml:"reply_prefix" json:"reply_prefix"` + ReplyPrefix string `yaml:"reply_prefix,omitempty" json:"reply_prefix,omitempty"` } func (c *ModelConfig) syncKnownUsecasesFromString() { diff --git a/core/schema/prediction.go b/core/schema/prediction.go index a75c7ab160af..40345ba50b14 100644 --- a/core/schema/prediction.go +++ b/core/schema/prediction.go @@ -1,50 +1,51 @@ package schema +// @Description PredictionOptions contains prediction parameters for model inference type PredictionOptions struct { // Also part of the OpenAI official spec BasicModelRequest `yaml:",inline"` // Also part of the OpenAI official spec - Language string `json:"language"` + Language string `json:"language,omitempty" yaml:"language,omitempty"` // Only for audio transcription - Translate bool `json:"translate"` + Translate bool `json:"translate,omitempty" yaml:"translate,omitempty"` // Also part of the OpenAI official spec. use it for returning multiple results - N int `json:"n"` + N int `json:"n,omitempty" yaml:"n,omitempty"` // Common options between all the API calls, part of the OpenAI spec - TopP *float64 `json:"top_p" yaml:"top_p"` - TopK *int `json:"top_k" yaml:"top_k"` - Temperature *float64 `json:"temperature" yaml:"temperature"` - Maxtokens *int `json:"max_tokens" yaml:"max_tokens"` - Echo bool `json:"echo"` + TopP *float64 `json:"top_p,omitempty" yaml:"top_p,omitempty"` + TopK *int `json:"top_k,omitempty" yaml:"top_k,omitempty"` + Temperature *float64 `json:"temperature,omitempty" yaml:"temperature,omitempty"` + Maxtokens *int `json:"max_tokens,omitempty" yaml:"max_tokens,omitempty"` + Echo bool `json:"echo,omitempty" yaml:"echo,omitempty"` // Custom parameters - not present in the OpenAI API - Batch int `json:"batch" yaml:"batch"` - IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"` - RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"` + Batch int `json:"batch,omitempty" yaml:"batch,omitempty"` + IgnoreEOS bool `json:"ignore_eos,omitempty" yaml:"ignore_eos,omitempty"` + RepeatPenalty float64 `json:"repeat_penalty,omitempty" yaml:"repeat_penalty,omitempty"` - RepeatLastN int `json:"repeat_last_n" yaml:"repeat_last_n"` + RepeatLastN int `json:"repeat_last_n,omitempty" yaml:"repeat_last_n,omitempty"` - Keep int `json:"n_keep" yaml:"n_keep"` + Keep int `json:"n_keep,omitempty" yaml:"n_keep,omitempty"` - FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"` - PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"` - TFZ *float64 `json:"tfz" yaml:"tfz"` + FrequencyPenalty float64 `json:"frequency_penalty,omitempty" yaml:"frequency_penalty,omitempty"` + PresencePenalty float64 `json:"presence_penalty,omitempty" yaml:"presence_penalty,omitempty"` + TFZ *float64 `json:"tfz,omitempty" yaml:"tfz,omitempty"` - TypicalP *float64 `json:"typical_p" yaml:"typical_p"` - Seed *int `json:"seed" yaml:"seed"` + TypicalP *float64 `json:"typical_p,omitempty" yaml:"typical_p,omitempty"` + Seed *int `json:"seed,omitempty" yaml:"seed,omitempty"` - NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"` - RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"` - RopeFreqScale float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"` - NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"` + NegativePrompt string `json:"negative_prompt,omitempty" yaml:"negative_prompt,omitempty"` + RopeFreqBase float32 `json:"rope_freq_base,omitempty" yaml:"rope_freq_base,omitempty"` + RopeFreqScale float32 `json:"rope_freq_scale,omitempty" yaml:"rope_freq_scale,omitempty"` + NegativePromptScale float32 `json:"negative_prompt_scale,omitempty" yaml:"negative_prompt_scale,omitempty"` // Diffusers - ClipSkip int `json:"clip_skip" yaml:"clip_skip"` + ClipSkip int `json:"clip_skip,omitempty" yaml:"clip_skip,omitempty"` // RWKV (?) - Tokenizer string `json:"tokenizer" yaml:"tokenizer"` + Tokenizer string `json:"tokenizer,omitempty" yaml:"tokenizer,omitempty"` } diff --git a/core/schema/request.go b/core/schema/request.go index f55f39200ca4..8998a2736950 100644 --- a/core/schema/request.go +++ b/core/schema/request.go @@ -5,8 +5,9 @@ type LocalAIRequest interface { ModelName(*string) string } +// @Description BasicModelRequest contains the basic model request fields type BasicModelRequest struct { - Model string `json:"model" yaml:"model"` + Model string `json:"model,omitempty" yaml:"model,omitempty"` // TODO: Should this also include the following fields from the OpenAI side of the world? // If so, changes should be made to core/http/middleware/request.go to match diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go index 49c4970a7609..66a1e81ca8ab 100644 --- a/pkg/functions/parse.go +++ b/pkg/functions/parse.go @@ -13,99 +13,102 @@ import ( "github.com/rs/zerolog/log" ) +// @Description GrammarConfig contains configuration for grammar parsing type GrammarConfig struct { // ParallelCalls enables the LLM to return multiple function calls in the same response - ParallelCalls bool `yaml:"parallel_calls"` + ParallelCalls bool `yaml:"parallel_calls,omitempty" json:"parallel_calls,omitempty"` - DisableParallelNewLines bool `yaml:"disable_parallel_new_lines"` + DisableParallelNewLines bool `yaml:"disable_parallel_new_lines,omitempty" json:"disable_parallel_new_lines,omitempty"` // MixedMode enables the LLM to return strings and not only JSON objects // This is useful for models to not constraining returning only JSON and also messages back to the user - MixedMode bool `yaml:"mixed_mode"` + MixedMode bool `yaml:"mixed_mode,omitempty" json:"mixed_mode,omitempty"` // NoMixedFreeString disables the mixed mode for free strings // In this way if the LLM selects a free string, it won't be mixed necessarily with JSON objects. // For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both // If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict - NoMixedFreeString bool `yaml:"no_mixed_free_string"` + NoMixedFreeString bool `yaml:"no_mixed_free_string,omitempty" json:"no_mixed_free_string,omitempty"` // NoGrammar disables the grammar parsing and parses the responses directly from the LLM - NoGrammar bool `yaml:"disable"` + NoGrammar bool `yaml:"disable,omitempty" json:"disable,omitempty"` // Prefix is the suffix to append to the grammar when being generated // This is useful when models prepend a tag before returning JSON - Prefix string `yaml:"prefix"` + Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"` // ExpectStringsAfterJSON enables mixed string suffix - ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"` + ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json,omitempty" json:"expect_strings_after_json,omitempty"` // PropOrder selects what order to print properties // for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } } // instead of { "arguments": { "bar": "baz" }, "name": "foo" } - PropOrder string `yaml:"properties_order"` + PropOrder string `yaml:"properties_order,omitempty" json:"properties_order,omitempty"` // SchemaType can be configured to use a specific schema type to force the grammar // available : json, llama3.1 - SchemaType string `yaml:"schema_type"` + SchemaType string `yaml:"schema_type,omitempty" json:"schema_type,omitempty"` - GrammarTriggers []GrammarTrigger `yaml:"triggers"` + GrammarTriggers []GrammarTrigger `yaml:"triggers,omitempty" json:"triggers,omitempty"` } +// @Description GrammarTrigger defines a trigger word for grammar parsing type GrammarTrigger struct { // Trigger is the string that triggers the grammar - Word string `yaml:"word"` + Word string `yaml:"word,omitempty" json:"word,omitempty"` } -// FunctionsConfig is the configuration for the tool/function call. +// @Description FunctionsConfig is the configuration for the tool/function call. // It includes setting to map the function name and arguments from the response // and, for instance, also if processing the requests with BNF grammars. type FunctionsConfig struct { // DisableNoAction disables the "no action" tool // By default we inject a tool that does nothing and is used to return an answer from the LLM - DisableNoAction bool `yaml:"disable_no_action"` + DisableNoAction bool `yaml:"disable_no_action,omitempty" json:"disable_no_action,omitempty"` // Grammar is the configuration for the grammar - GrammarConfig GrammarConfig `yaml:"grammar"` + GrammarConfig GrammarConfig `yaml:"grammar,omitempty" json:"grammar,omitempty"` // NoActionFunctionName is the name of the function that does nothing. It defaults to "answer" - NoActionFunctionName string `yaml:"no_action_function_name"` + NoActionFunctionName string `yaml:"no_action_function_name,omitempty" json:"no_action_function_name,omitempty"` // NoActionDescriptionName is the name of the function that returns the description of the no action function - NoActionDescriptionName string `yaml:"no_action_description_name"` + NoActionDescriptionName string `yaml:"no_action_description_name,omitempty" json:"no_action_description_name,omitempty"` // ResponseRegex is a named regex to extract the function name and arguments from the response - ResponseRegex []string `yaml:"response_regex"` + ResponseRegex []string `yaml:"response_regex,omitempty" json:"response_regex,omitempty"` // JSONRegexMatch is a regex to extract the JSON object from the response - JSONRegexMatch []string `yaml:"json_regex_match"` + JSONRegexMatch []string `yaml:"json_regex_match,omitempty" json:"json_regex_match,omitempty"` // ArgumentRegex is a named regex to extract the arguments from the response. Use ArgumentRegexKey and ArgumentRegexValue to set the names of the named regex for key and value of the arguments. - ArgumentRegex []string `yaml:"argument_regex"` + ArgumentRegex []string `yaml:"argument_regex,omitempty" json:"argument_regex,omitempty"` // ArgumentRegex named regex names for key and value extractions. default: key and value - ArgumentRegexKey string `yaml:"argument_regex_key_name"` // default: key - ArgumentRegexValue string `yaml:"argument_regex_value_name"` // default: value + ArgumentRegexKey string `yaml:"argument_regex_key_name,omitempty" json:"argument_regex_key_name,omitempty"` // default: key + ArgumentRegexValue string `yaml:"argument_regex_value_name,omitempty" json:"argument_regex_value_name,omitempty"` // default: value // ReplaceFunctionResults allow to replace strings in the results before parsing them - ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results"` + ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results,omitempty" json:"replace_function_results,omitempty"` // ReplaceLLMResult allow to replace strings in the results before parsing them - ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results"` + ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results,omitempty" json:"replace_llm_results,omitempty"` // CaptureLLMResult is a regex to extract a string from the LLM response // that is used as return string when using tools. // This is useful for e.g. if the LLM outputs a reasoning and we want to get the reasoning as a string back - CaptureLLMResult []string `yaml:"capture_llm_results"` + CaptureLLMResult []string `yaml:"capture_llm_results,omitempty" json:"capture_llm_results,omitempty"` // FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } } // instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }. // This might be useful for certain models trained with the function name as the first token. - FunctionNameKey string `yaml:"function_name_key"` - FunctionArgumentsKey string `yaml:"function_arguments_key"` + FunctionNameKey string `yaml:"function_name_key,omitempty" json:"function_name_key,omitempty"` + FunctionArgumentsKey string `yaml:"function_arguments_key,omitempty" json:"function_arguments_key,omitempty"` } +// @Description ReplaceResult defines a key-value replacement for function results type ReplaceResult struct { - Key string `yaml:"key"` - Value string `yaml:"value"` + Key string `yaml:"key,omitempty" json:"key,omitempty"` + Value string `yaml:"value,omitempty" json:"value,omitempty"` } type FuncCallResults struct { From 2f4f8bd46da3bdeab804ea4ec39ae918dedf7d89 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 12 Nov 2025 09:38:42 +0100 Subject: [PATCH 08/16] Fix tests Signed-off-by: Ettore Di Giacinto --- pkg/huggingface-api/client_test.go | 56 +++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/pkg/huggingface-api/client_test.go b/pkg/huggingface-api/client_test.go index 37a381416125..ffa35bb90a94 100644 --- a/pkg/huggingface-api/client_test.go +++ b/pkg/huggingface-api/client_test.go @@ -270,6 +270,15 @@ var _ = Describe("HuggingFace API Client", func() { }) }) + Context("when getting file SHA on remote model", func() { + It("should get file SHA successfully", func() { + sha, err := client.GetFileSHA( + "mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF", "localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf") + Expect(err).ToNot(HaveOccurred()) + Expect(sha).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4")) + }) + }) + Context("when listing files", func() { BeforeEach(func() { mockFilesResponse := `[ @@ -329,23 +338,25 @@ var _ = Describe("HuggingFace API Client", func() { Context("when getting file SHA", func() { BeforeEach(func() { - mockFileInfoResponse := `{ - "path": "model-Q4_K_M.gguf", - "size": 1000000, - "oid": "abc123", - "lfs": { - "oid": "sha256:def456", + mockFilesResponse := `[ + { + "type": "file", + "path": "model-Q4_K_M.gguf", "size": 1000000, - "pointer": "version https://git-lfs.github.com/spec/v1", - "sha256": "def456789" + "oid": "abc123", + "lfs": { + "oid": "def456789", + "size": 1000000, + "pointerSize": 135 + } } - }` + ]` server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if strings.Contains(r.URL.Path, "/paths-info") { + if strings.Contains(r.URL.Path, "/tree/main") { w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) - w.Write([]byte(mockFileInfoResponse)) + w.Write([]byte(mockFilesResponse)) } else { w.WriteHeader(http.StatusNotFound) } @@ -363,18 +374,29 @@ var _ = Describe("HuggingFace API Client", func() { It("should handle missing SHA gracefully", func() { server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusOK) - w.Write([]byte(`{"path": "file.txt", "size": 100}`)) + if strings.Contains(r.URL.Path, "/tree/main") { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + w.Write([]byte(`[ + { + "type": "file", + "path": "file.txt", + "size": 100, + "oid": "file123" + } + ]`)) + } else { + w.WriteHeader(http.StatusNotFound) + } })) client.SetBaseURL(server.URL) sha, err := client.GetFileSHA("test/model", "file.txt") - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("no SHA256 found")) - Expect(sha).To(Equal("")) + Expect(err).ToNot(HaveOccurred()) + // When there's no LFS, it should return the OID + Expect(sha).To(Equal("file123")) }) }) From 8937998c744dc752af88da850e0605e2055ef3bb Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 12 Nov 2025 09:44:49 +0100 Subject: [PATCH 09/16] Add MLX importer Signed-off-by: Ettore Di Giacinto --- core/gallery/importers/importers.go | 1 + core/gallery/importers/mlx.go | 94 +++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 core/gallery/importers/mlx.go diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go index 5236af0cc245..532d8e68c9f5 100644 --- a/core/gallery/importers/importers.go +++ b/core/gallery/importers/importers.go @@ -7,6 +7,7 @@ import ( var DefaultImporters = []Importer{ &LlamaCPPImporter{}, + &MLXImporter{}, } type Importer interface { diff --git a/core/gallery/importers/mlx.go b/core/gallery/importers/mlx.go new file mode 100644 index 000000000000..a8e183791873 --- /dev/null +++ b/core/gallery/importers/mlx.go @@ -0,0 +1,94 @@ +package importers + +import ( + "encoding/json" + "path/filepath" + "strings" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/schema" + "go.yaml.in/yaml/v2" +) + +var _ Importer = &MLXImporter{} + +type MLXImporter struct{} + +func (i *MLXImporter) Match(uri string, request schema.ImportModelRequest) bool { + preferences, err := request.Preferences.MarshalJSON() + if err != nil { + return false + } + preferencesMap := make(map[string]any) + err = json.Unmarshal(preferences, &preferencesMap) + if err != nil { + return false + } + + b, ok := preferencesMap["backend"].(string) + if ok && b == "mlx" || b == "mlx-vlm" { + return true + } + + // All https://huggingface.co/mlx-community/* + if strings.Contains(uri, "mlx-community/") { + return true + } + + return false +} + +func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) { + preferences, err := request.Preferences.MarshalJSON() + if err != nil { + return gallery.ModelConfig{}, err + } + preferencesMap := make(map[string]any) + err = json.Unmarshal(preferences, &preferencesMap) + if err != nil { + return gallery.ModelConfig{}, err + } + + name, ok := preferencesMap["name"].(string) + if !ok { + name = filepath.Base(uri) + } + + description, ok := preferencesMap["description"].(string) + if !ok { + description = "Imported from " + uri + } + + backend := "mlx" + b, ok := preferencesMap["backend"].(string) + if ok { + backend = b + } + + modelConfig := config.ModelConfig{ + Name: name, + Description: description, + KnownUsecaseStrings: []string{"chat"}, + Backend: backend, + PredictionOptions: schema.PredictionOptions{ + BasicModelRequest: schema.BasicModelRequest{ + Model: uri, + }, + }, + TemplateConfig: config.TemplateConfig{ + UseTokenizerTemplate: true, + }, + } + + data, err := yaml.Marshal(modelConfig) + if err != nil { + return gallery.ModelConfig{}, err + } + + return gallery.ModelConfig{ + Name: name, + Description: description, + ConfigFile: string(data), + }, nil +} From dc44c57c80e91f6073807eac5b865578479064ec Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 12 Nov 2025 12:53:02 +0100 Subject: [PATCH 10/16] Small refactors to star to use HF for discovery Signed-off-by: Ettore Di Giacinto --- core/gallery/importers/importers.go | 47 +++++++++++++++++++-- core/gallery/importers/llama-cpp.go | 20 ++++----- core/gallery/importers/mlx.go | 16 +++---- core/http/endpoints/localai/import_model.go | 14 ++---- core/startup/model_preload.go | 25 +++++------ 5 files changed, 75 insertions(+), 47 deletions(-) diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go index 532d8e68c9f5..f87bc5b81a8e 100644 --- a/core/gallery/importers/importers.go +++ b/core/gallery/importers/importers.go @@ -1,8 +1,12 @@ package importers import ( + "encoding/json" + + "github.com/rs/zerolog/log" + "github.com/mudler/LocalAI/core/gallery" - "github.com/mudler/LocalAI/core/schema" + hfapi "github.com/mudler/LocalAI/pkg/huggingface-api" ) var DefaultImporters = []Importer{ @@ -10,7 +14,44 @@ var DefaultImporters = []Importer{ &MLXImporter{}, } +type Details struct { + HuggingFace *hfapi.ModelDetails + URI string + Preferences json.RawMessage +} + type Importer interface { - Match(uri string, request schema.ImportModelRequest) bool - Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) + Match(details Details) bool + Import(details Details) (gallery.ModelConfig, error) +} + +func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.ModelConfig, error) { + var err error + var modelConfig gallery.ModelConfig + + hf := hfapi.NewClient() + + hfDetails, err := hf.GetModelDetails(uri) + if err != nil { + // maybe not a HF repository + // TODO: maybe we can check if the URI is a valid HF repository + log.Debug().Str("uri", uri).Msg("Failed to get model details, maybe not a HF repository") + } + + details := Details{ + HuggingFace: hfDetails, + URI: uri, + Preferences: preferences, + } + + for _, importer := range DefaultImporters { + if importer.Match(details) { + modelConfig, err = importer.Import(details) + if err != nil { + continue + } + break + } + } + return modelConfig, err } diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go index 3596c868f50a..d84500fa19e6 100644 --- a/core/gallery/importers/llama-cpp.go +++ b/core/gallery/importers/llama-cpp.go @@ -16,8 +16,8 @@ var _ Importer = &LlamaCPPImporter{} type LlamaCPPImporter struct{} -func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest) bool { - preferences, err := request.Preferences.MarshalJSON() +func (i *LlamaCPPImporter) Match(details Details) bool { + preferences, err := details.Preferences.MarshalJSON() if err != nil { return false } @@ -31,11 +31,11 @@ func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest) return true } - return strings.HasSuffix(uri, ".gguf") + return strings.HasSuffix(details.URI, ".gguf") } -func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) { - preferences, err := request.Preferences.MarshalJSON() +func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) { + preferences, err := details.Preferences.MarshalJSON() if err != nil { return gallery.ModelConfig{}, err } @@ -47,12 +47,12 @@ func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest) name, ok := preferencesMap["name"].(string) if !ok { - name = filepath.Base(uri) + name = filepath.Base(details.URI) } description, ok := preferencesMap["description"].(string) if !ok { - description = "Imported from " + uri + description = "Imported from " + details.URI } modelConfig := config.ModelConfig{ @@ -62,7 +62,7 @@ func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest) Backend: "llama-cpp", PredictionOptions: schema.PredictionOptions{ BasicModelRequest: schema.BasicModelRequest{ - Model: filepath.Base(uri), + Model: filepath.Base(details.URI), }, }, TemplateConfig: config.TemplateConfig{ @@ -86,8 +86,8 @@ func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest) ConfigFile: string(data), Files: []gallery.File{ { - URI: uri, - Filename: filepath.Base(uri), + URI: details.URI, + Filename: filepath.Base(details.URI), }, }, }, nil diff --git a/core/gallery/importers/mlx.go b/core/gallery/importers/mlx.go index a8e183791873..faa28846f4ea 100644 --- a/core/gallery/importers/mlx.go +++ b/core/gallery/importers/mlx.go @@ -15,8 +15,8 @@ var _ Importer = &MLXImporter{} type MLXImporter struct{} -func (i *MLXImporter) Match(uri string, request schema.ImportModelRequest) bool { - preferences, err := request.Preferences.MarshalJSON() +func (i *MLXImporter) Match(details Details) bool { + preferences, err := details.Preferences.MarshalJSON() if err != nil { return false } @@ -32,15 +32,15 @@ func (i *MLXImporter) Match(uri string, request schema.ImportModelRequest) bool } // All https://huggingface.co/mlx-community/* - if strings.Contains(uri, "mlx-community/") { + if strings.Contains(details.URI, "mlx-community/") { return true } return false } -func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) { - preferences, err := request.Preferences.MarshalJSON() +func (i *MLXImporter) Import(details Details) (gallery.ModelConfig, error) { + preferences, err := details.Preferences.MarshalJSON() if err != nil { return gallery.ModelConfig{}, err } @@ -52,12 +52,12 @@ func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gal name, ok := preferencesMap["name"].(string) if !ok { - name = filepath.Base(uri) + name = filepath.Base(details.URI) } description, ok := preferencesMap["description"].(string) if !ok { - description = "Imported from " + uri + description = "Imported from " + details.URI } backend := "mlx" @@ -73,7 +73,7 @@ func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gal Backend: backend, PredictionOptions: schema.PredictionOptions{ BasicModelRequest: schema.BasicModelRequest{ - Model: uri, + Model: details.URI, }, }, TemplateConfig: config.TemplateConfig{ diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go index ea71ae53ae5d..b6d34055390f 100644 --- a/core/http/endpoints/localai/import_model.go +++ b/core/http/endpoints/localai/import_model.go @@ -30,17 +30,9 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl return err } - var err error - var modelConfig gallery.ModelConfig - - for _, importer := range importers.DefaultImporters { - if importer.Match(input.URI, *input) { - modelConfig, err = importer.Import(input.URI, *input) - if err != nil { - continue - } - break - } + modelConfig, err := importers.DiscoverModelConfig(input.URI, input.Preferences) + if err != nil { + return fmt.Errorf("failed to discover model config: %w", err) } uuid, err := uuid.NewUUID() diff --git a/core/startup/model_preload.go b/core/startup/model_preload.go index 4550a25e46f3..193aad6a2e52 100644 --- a/core/startup/model_preload.go +++ b/core/startup/model_preload.go @@ -1,6 +1,7 @@ package startup import ( + "encoding/json" "errors" "fmt" "os" @@ -13,7 +14,6 @@ import ( "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/gallery/importers" - "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/model" @@ -165,22 +165,17 @@ func InstallModels(galleryService *services.GalleryService, galleries, backendGa continue } - // TODO: start autoimporter - var err error - var modelConfig gallery.ModelConfig - for _, importer := range importers.DefaultImporters { - if importer.Match(url, schema.ImportModelRequest{}) { - modelConfig, err = importer.Import(url, schema.ImportModelRequest{}) - if err != nil { - continue - } - break - } + // TODO: we should just use the discoverModelConfig here and default to this. + modelConfig, discoverErr := importers.DiscoverModelConfig(url, json.RawMessage{}) + if discoverErr != nil { + err = errors.Join(discoverErr, fmt.Errorf("failed to discover model config: %w", err)) + continue } - uuid, err := uuid.NewUUID() - if err != nil { - return err + uuid, uuidErr := uuid.NewUUID() + if uuidErr != nil { + err = errors.Join(uuidErr, fmt.Errorf("failed to generate UUID: %w", uuidErr)) + continue } galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{ From 71f2163459c932b521cdd1fc9eba24fceb6908ef Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 12 Nov 2025 16:05:27 +0100 Subject: [PATCH 11/16] Add tests Signed-off-by: Ettore Di Giacinto --- .../gallery/importers/importers_suite_test.go | 13 ++ core/gallery/importers/llama-cpp_test.go | 128 +++++++++++++++ core/gallery/importers/mlx_test.go | 147 ++++++++++++++++++ 3 files changed, 288 insertions(+) create mode 100644 core/gallery/importers/importers_suite_test.go create mode 100644 core/gallery/importers/llama-cpp_test.go create mode 100644 core/gallery/importers/mlx_test.go diff --git a/core/gallery/importers/importers_suite_test.go b/core/gallery/importers/importers_suite_test.go new file mode 100644 index 000000000000..a65b8163ad56 --- /dev/null +++ b/core/gallery/importers/importers_suite_test.go @@ -0,0 +1,13 @@ +package importers_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestImporters(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Importers test suite") +} diff --git a/core/gallery/importers/llama-cpp_test.go b/core/gallery/importers/llama-cpp_test.go new file mode 100644 index 000000000000..19f25e0a1dc6 --- /dev/null +++ b/core/gallery/importers/llama-cpp_test.go @@ -0,0 +1,128 @@ +package importers_test + +import ( + "encoding/json" + + "github.com/mudler/LocalAI/core/gallery/importers" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("LlamaCPPImporter", func() { + var importer *importers.LlamaCPPImporter + + BeforeEach(func() { + importer = &importers.LlamaCPPImporter{} + }) + + Context("Match", func() { + It("should match when URI ends with .gguf", func() { + details := importers.Details{ + URI: "https://example.com/model.gguf", + } + + result := importer.Match(details) + Expect(result).To(BeTrue()) + }) + + It("should match when backend preference is llama-cpp", func() { + preferences := json.RawMessage(`{"backend": "llama-cpp"}`) + details := importers.Details{ + URI: "https://example.com/model", + Preferences: preferences, + } + + result := importer.Match(details) + Expect(result).To(BeTrue()) + }) + + It("should not match when URI does not end with .gguf and no backend preference", func() { + details := importers.Details{ + URI: "https://example.com/model.bin", + } + + result := importer.Match(details) + Expect(result).To(BeFalse()) + }) + + It("should not match when backend preference is different", func() { + preferences := json.RawMessage(`{"backend": "mlx"}`) + details := importers.Details{ + URI: "https://example.com/model", + Preferences: preferences, + } + + result := importer.Match(details) + Expect(result).To(BeFalse()) + }) + + It("should return false when JSON preferences are invalid", func() { + preferences := json.RawMessage(`invalid json`) + details := importers.Details{ + URI: "https://example.com/model.gguf", + Preferences: preferences, + } + + // Invalid JSON causes Match to return false early + result := importer.Match(details) + Expect(result).To(BeFalse()) + }) + }) + + Context("Import", func() { + It("should import model config with default name and description", func() { + details := importers.Details{ + URI: "https://example.com/my-model.gguf", + } + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("my-model.gguf")) + Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf")) + Expect(modelConfig.Files).To(HaveLen(1)) + Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf")) + Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: my-model.gguf")) + }) + + It("should import model config with custom name and description from preferences", func() { + preferences := json.RawMessage(`{"name": "custom-model", "description": "Custom description"}`) + details := importers.Details{ + URI: "https://example.com/my-model.gguf", + Preferences: preferences, + } + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("custom-model")) + Expect(modelConfig.Description).To(Equal("Custom description")) + Expect(modelConfig.Files).To(HaveLen(1)) + }) + + It("should handle invalid JSON preferences", func() { + preferences := json.RawMessage(`invalid json`) + details := importers.Details{ + URI: "https://example.com/my-model.gguf", + Preferences: preferences, + } + + _, err := importer.Import(details) + Expect(err).To(HaveOccurred()) + }) + + It("should extract filename correctly from URI with path", func() { + details := importers.Details{ + URI: "https://example.com/path/to/model.gguf", + } + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: model.gguf")) + }) + }) +}) diff --git a/core/gallery/importers/mlx_test.go b/core/gallery/importers/mlx_test.go new file mode 100644 index 000000000000..82e02aff0b44 --- /dev/null +++ b/core/gallery/importers/mlx_test.go @@ -0,0 +1,147 @@ +package importers_test + +import ( + "encoding/json" + + "github.com/mudler/LocalAI/core/gallery/importers" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("MLXImporter", func() { + var importer *importers.MLXImporter + + BeforeEach(func() { + importer = &importers.MLXImporter{} + }) + + Context("Match", func() { + It("should match when URI contains mlx-community/", func() { + details := importers.Details{ + URI: "https://huggingface.co/mlx-community/test-model", + } + + result := importer.Match(details) + Expect(result).To(BeTrue()) + }) + + It("should match when backend preference is mlx", func() { + preferences := json.RawMessage(`{"backend": "mlx"}`) + details := importers.Details{ + URI: "https://example.com/model", + Preferences: preferences, + } + + result := importer.Match(details) + Expect(result).To(BeTrue()) + }) + + It("should match when backend preference is mlx-vlm", func() { + preferences := json.RawMessage(`{"backend": "mlx-vlm"}`) + details := importers.Details{ + URI: "https://example.com/model", + Preferences: preferences, + } + + result := importer.Match(details) + Expect(result).To(BeTrue()) + }) + + It("should not match when URI does not contain mlx-community/ and no backend preference", func() { + details := importers.Details{ + URI: "https://huggingface.co/other-org/test-model", + } + + result := importer.Match(details) + Expect(result).To(BeFalse()) + }) + + It("should not match when backend preference is different", func() { + preferences := json.RawMessage(`{"backend": "llama-cpp"}`) + details := importers.Details{ + URI: "https://example.com/model", + Preferences: preferences, + } + + result := importer.Match(details) + Expect(result).To(BeFalse()) + }) + + It("should return false when JSON preferences are invalid", func() { + preferences := json.RawMessage(`invalid json`) + details := importers.Details{ + URI: "https://huggingface.co/mlx-community/test-model", + Preferences: preferences, + } + + // Invalid JSON causes Match to return false early + result := importer.Match(details) + Expect(result).To(BeFalse()) + }) + }) + + Context("Import", func() { + It("should import model config with default name and description", func() { + details := importers.Details{ + URI: "https://huggingface.co/mlx-community/test-model", + } + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("test-model")) + Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("model: https://huggingface.co/mlx-community/test-model")) + }) + + It("should import model config with custom name and description from preferences", func() { + preferences := json.RawMessage(`{"name": "custom-mlx-model", "description": "Custom MLX description"}`) + details := importers.Details{ + URI: "https://huggingface.co/mlx-community/test-model", + Preferences: preferences, + } + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("custom-mlx-model")) + Expect(modelConfig.Description).To(Equal("Custom MLX description")) + }) + + It("should use custom backend from preferences", func() { + preferences := json.RawMessage(`{"backend": "mlx-vlm"}`) + details := importers.Details{ + URI: "https://huggingface.co/mlx-community/test-model", + Preferences: preferences, + } + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm")) + }) + + It("should handle invalid JSON preferences", func() { + preferences := json.RawMessage(`invalid json`) + details := importers.Details{ + URI: "https://huggingface.co/mlx-community/test-model", + Preferences: preferences, + } + + _, err := importer.Import(details) + Expect(err).To(HaveOccurred()) + }) + + It("should extract filename correctly from URI with path", func() { + details := importers.Details{ + URI: "https://huggingface.co/mlx-community/path/to/model", + } + + modelConfig, err := importer.Import(details) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("model")) + }) + }) +}) From b8ed4467f0cb6f43338dbb594fea27a781f32bb8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 12 Nov 2025 16:13:06 +0100 Subject: [PATCH 12/16] Common preferences Signed-off-by: Ettore Di Giacinto --- core/http/views/model-editor.html | 151 +++++++++++++++++++++++------- 1 file changed, 118 insertions(+), 33 deletions(-) diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html index f8db7cf66fae..68b26360c1b5 100644 --- a/core/http/views/model-editor.html +++ b/core/http/views/model-editor.html @@ -109,40 +109,107 @@

-

-
- + +
+

+ Common Preferences +

+ + +
+ + +

+ Force a specific backend. Leave empty to auto-detect from URI. +

+
+ + +
+ + +

+ Custom name for the model. If empty, the filename will be used. +

+
+ + +
+ + +

+ Custom description for the model. If empty, a default description will be generated. +

+
+
+ + +
+
+ + +
+ +
+ +
+

+ Add custom key-value pairs for advanced configuration +

-

- Add key-value pairs to customize the import process -

@@ -354,6 +421,11 @@

isEditMode: {{if .ModelName}}true{{else}}false{{end}}, importUri: '', preferences: [], + commonPreferences: { + backend: '', + name: '', + description: '' + }, isSubmitting: false, currentJobId: null, jobPollInterval: null, @@ -402,11 +474,24 @@

this.isSubmitting = true; try { - // Build preferences object + // Build preferences object starting with common preferences const prefsObj = {}; + + // Add common preferences (only non-empty values) + if (this.commonPreferences.backend && this.commonPreferences.backend.trim()) { + prefsObj.backend = this.commonPreferences.backend.trim(); + } + if (this.commonPreferences.name && this.commonPreferences.name.trim()) { + prefsObj.name = this.commonPreferences.name.trim(); + } + if (this.commonPreferences.description && this.commonPreferences.description.trim()) { + prefsObj.description = this.commonPreferences.description.trim(); + } + + // Add custom preferences (can override common ones) this.preferences.forEach(pref => { if (pref.key && pref.value) { - prefsObj[pref.key] = pref.value; + prefsObj[pref.key.trim()] = pref.value.trim(); } }); From 9cd627362efb0695d22fbe6635b26d50c970768c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 12 Nov 2025 17:41:04 +0100 Subject: [PATCH 13/16] Add support to bare HF repos Signed-off-by: Ettore Di Giacinto --- core/gallery/importers/importers.go | 10 +- core/gallery/importers/importers_test.go | 173 +++++++++++++++++++++++ core/gallery/importers/llama-cpp.go | 76 +++++++--- core/gallery/importers/llama-cpp_test.go | 17 ++- core/http/views/model-editor.html | 22 ++- pkg/huggingface-api/client.go | 7 + pkg/huggingface-api/client_test.go | 8 ++ 7 files changed, 287 insertions(+), 26 deletions(-) create mode 100644 core/gallery/importers/importers_test.go diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go index f87bc5b81a8e..0be81ed06d0d 100644 --- a/core/gallery/importers/importers.go +++ b/core/gallery/importers/importers.go @@ -2,6 +2,7 @@ package importers import ( "encoding/json" + "strings" "github.com/rs/zerolog/log" @@ -31,11 +32,18 @@ func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.Model hf := hfapi.NewClient() - hfDetails, err := hf.GetModelDetails(uri) + hfrepoID := strings.ReplaceAll(uri, "huggingface://", "") + hfrepoID = strings.ReplaceAll(hfrepoID, "hf://", "") + hfrepoID = strings.ReplaceAll(hfrepoID, "https://huggingface.co/", "") + + hfDetails, err := hf.GetModelDetails(hfrepoID) if err != nil { // maybe not a HF repository // TODO: maybe we can check if the URI is a valid HF repository log.Debug().Str("uri", uri).Msg("Failed to get model details, maybe not a HF repository") + } else { + log.Debug().Str("uri", uri).Msg("Got model details") + log.Debug().Any("details", hfDetails).Msg("Model details") } details := Details{ diff --git a/core/gallery/importers/importers_test.go b/core/gallery/importers/importers_test.go new file mode 100644 index 000000000000..48fb6ac3d46a --- /dev/null +++ b/core/gallery/importers/importers_test.go @@ -0,0 +1,173 @@ +package importers_test + +import ( + "encoding/json" + "fmt" + + "github.com/mudler/LocalAI/core/gallery/importers" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("DiscoverModelConfig", func() { + + Context("With only a repository URI", func() { + It("should discover and import using LlamaCPPImporter", func() { + uri := "https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF" + preferences := json.RawMessage(`{}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err)) + Expect(modelConfig.Name).To(Equal("LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].Filename).To(Equal("localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/resolve/main/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].SHA256).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"), fmt.Sprintf("Model config: %+v", modelConfig)) + }) + }) + + Context("with .gguf URI", func() { + It("should discover and import using LlamaCPPImporter", func() { + uri := "https://example.com/my-model.gguf" + preferences := json.RawMessage(`{}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("my-model.gguf")) + Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp")) + }) + + It("should use custom preferences when provided", func() { + uri := "https://example.com/my-model.gguf" + preferences := json.RawMessage(`{"name": "custom-name", "description": "Custom description"}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("custom-name")) + Expect(modelConfig.Description).To(Equal("Custom description")) + }) + }) + + Context("with mlx-community URI", func() { + It("should discover and import using MLXImporter", func() { + uri := "https://huggingface.co/mlx-community/test-model" + preferences := json.RawMessage(`{}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("test-model")) + Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model")) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx")) + }) + + It("should use custom preferences when provided", func() { + uri := "https://huggingface.co/mlx-community/test-model" + preferences := json.RawMessage(`{"name": "custom-mlx", "description": "Custom MLX description"}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("custom-mlx")) + Expect(modelConfig.Description).To(Equal("Custom MLX description")) + }) + }) + + Context("with backend preference", func() { + It("should use llama-cpp backend when specified", func() { + uri := "https://example.com/model" + preferences := json.RawMessage(`{"backend": "llama-cpp"}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp")) + }) + + It("should use mlx backend when specified", func() { + uri := "https://example.com/model" + preferences := json.RawMessage(`{"backend": "mlx"}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx")) + }) + + It("should use mlx-vlm backend when specified", func() { + uri := "https://example.com/model" + preferences := json.RawMessage(`{"backend": "mlx-vlm"}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm")) + }) + }) + + Context("with HuggingFace URI formats", func() { + It("should handle huggingface:// prefix", func() { + uri := "huggingface://mlx-community/test-model" + preferences := json.RawMessage(`{}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("test-model")) + }) + + It("should handle hf:// prefix", func() { + uri := "hf://mlx-community/test-model" + preferences := json.RawMessage(`{}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("test-model")) + }) + + It("should handle https://huggingface.co/ prefix", func() { + uri := "https://huggingface.co/mlx-community/test-model" + preferences := json.RawMessage(`{}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).ToNot(HaveOccurred()) + Expect(modelConfig.Name).To(Equal("test-model")) + }) + }) + + Context("with invalid or non-matching URI", func() { + It("should return error when no importer matches", func() { + uri := "https://example.com/unknown-model.bin" + preferences := json.RawMessage(`{}`) + + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + // When no importer matches, the function returns empty config and error + // The exact behavior depends on implementation, but typically an error is returned + Expect(modelConfig.Name).To(BeEmpty()) + Expect(err).To(HaveOccurred()) + }) + }) + + Context("with invalid JSON preferences", func() { + It("should return error when JSON is invalid even if URI matches", func() { + uri := "https://example.com/model.gguf" + preferences := json.RawMessage(`invalid json`) + + // Even though Match() returns true for .gguf extension, + // Import() will fail when trying to unmarshal invalid JSON preferences + modelConfig, err := importers.DiscoverModelConfig(uri, preferences) + + Expect(err).To(HaveOccurred()) + Expect(modelConfig.Name).To(BeEmpty()) + }) + }) +}) diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go index d84500fa19e6..d4c8ff591fdb 100644 --- a/core/gallery/importers/llama-cpp.go +++ b/core/gallery/importers/llama-cpp.go @@ -3,6 +3,7 @@ package importers import ( "encoding/json" "path/filepath" + "slices" "strings" "github.com/mudler/LocalAI/core/config" @@ -31,7 +32,19 @@ func (i *LlamaCPPImporter) Match(details Details) bool { return true } - return strings.HasSuffix(details.URI, ".gguf") + if strings.HasSuffix(details.URI, ".gguf") { + return true + } + + if details.HuggingFace != nil { + for _, file := range details.HuggingFace.Files { + if strings.HasSuffix(file.Path, ".gguf") { + return true + } + } + } + + return false } func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) { @@ -55,16 +68,17 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) description = "Imported from " + details.URI } + preferedQuantizations, _ := preferencesMap["quantizations"].(string) + quants := []string{"q4_k_m"} + if preferedQuantizations != "" { + quants = strings.Split(preferedQuantizations, ",") + } + modelConfig := config.ModelConfig{ Name: name, Description: description, KnownUsecaseStrings: []string{"chat"}, Backend: "llama-cpp", - PredictionOptions: schema.PredictionOptions{ - BasicModelRequest: schema.BasicModelRequest{ - Model: filepath.Base(details.URI), - }, - }, TemplateConfig: config.TemplateConfig{ UseTokenizerTemplate: true, }, @@ -75,20 +89,48 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) }, } + cfg := gallery.ModelConfig{ + Name: name, + Description: description, + } + + if strings.Contains(details.URI, ".gguf") { + cfg.Files = append(cfg.Files, gallery.File{ + URI: details.URI, + Filename: filepath.Base(details.URI), + }) + modelConfig.PredictionOptions = schema.PredictionOptions{ + BasicModelRequest: schema.BasicModelRequest{ + Model: filepath.Base(details.URI), + }, + } + } else if details.HuggingFace != nil { + for _, file := range details.HuggingFace.Files { + if slices.ContainsFunc(quants, func(quant string) bool { + return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant)) + }) { + cfg.Files = append(cfg.Files, gallery.File{ + URI: file.URL, + Filename: filepath.Base(file.Path), + SHA256: file.SHA256, + }) + } + } + if len(modelConfig.DownloadFiles) > 0 { + modelConfig.PredictionOptions = schema.PredictionOptions{ + BasicModelRequest: schema.BasicModelRequest{ + Model: modelConfig.DownloadFiles[0].Filename, + }, + } + } + } + data, err := yaml.Marshal(modelConfig) if err != nil { return gallery.ModelConfig{}, err } - return gallery.ModelConfig{ - Name: name, - Description: description, - ConfigFile: string(data), - Files: []gallery.File{ - { - URI: details.URI, - Filename: filepath.Base(details.URI), - }, - }, - }, nil + cfg.ConfigFile = string(data) + + return cfg, nil } diff --git a/core/gallery/importers/llama-cpp_test.go b/core/gallery/importers/llama-cpp_test.go index 19f25e0a1dc6..5a3c4d74f44d 100644 --- a/core/gallery/importers/llama-cpp_test.go +++ b/core/gallery/importers/llama-cpp_test.go @@ -2,6 +2,7 @@ package importers_test import ( "encoding/json" + "fmt" "github.com/mudler/LocalAI/core/gallery/importers" . "github.com/onsi/ginkgo/v2" @@ -80,11 +81,10 @@ var _ = Describe("LlamaCPPImporter", func() { Expect(err).ToNot(HaveOccurred()) Expect(modelConfig.Name).To(Equal("my-model.gguf")) Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf")) - Expect(modelConfig.Files).To(HaveLen(1)) - Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf")) - Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf")) Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp")) - Expect(modelConfig.ConfigFile).To(ContainSubstring("model: my-model.gguf")) + Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) }) It("should import model config with custom name and description from preferences", func() { @@ -99,7 +99,9 @@ var _ = Describe("LlamaCPPImporter", func() { Expect(err).ToNot(HaveOccurred()) Expect(modelConfig.Name).To(Equal("custom-model")) Expect(modelConfig.Description).To(Equal("Custom description")) - Expect(modelConfig.Files).To(HaveLen(1)) + Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) }) It("should handle invalid JSON preferences", func() { @@ -121,8 +123,9 @@ var _ = Describe("LlamaCPPImporter", func() { modelConfig, err := importer.Import(details) Expect(err).ToNot(HaveOccurred()) - Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf")) - Expect(modelConfig.ConfigFile).To(ContainSubstring("model: model.gguf")) + Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/path/to/model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) + Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig)) }) }) }) diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html index 68b26360c1b5..d18aa6cdb924 100644 --- a/core/http/views/model-editor.html +++ b/core/http/views/model-editor.html @@ -167,6 +167,22 @@

Custom description for the model. If empty, a default description will be generated.

+ + +
+ + +

+ Preferred quantizations (comma-separated). Examples: q4_k_m, q4_k_s, q3_k_m, q2_k. Leave empty to use default (q4_k_m). +

+
@@ -424,7 +440,8 @@

commonPreferences: { backend: '', name: '', - description: '' + description: '', + quantizations: '' }, isSubmitting: false, currentJobId: null, @@ -487,6 +504,9 @@

if (this.commonPreferences.description && this.commonPreferences.description.trim()) { prefsObj.description = this.commonPreferences.description.trim(); } + if (this.commonPreferences.quantizations && this.commonPreferences.quantizations.trim()) { + prefsObj.quantizations = this.commonPreferences.quantizations.trim(); + } // Add custom preferences (can override common ones) this.preferences.forEach(pref => { diff --git a/pkg/huggingface-api/client.go b/pkg/huggingface-api/client.go index 37fb52bb22c1..9b1959f1d857 100644 --- a/pkg/huggingface-api/client.go +++ b/pkg/huggingface-api/client.go @@ -51,6 +51,7 @@ type ModelFile struct { Size int64 SHA256 string IsReadme bool + URL string } // ModelDetails represents detailed information about a model @@ -215,6 +216,7 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) { } // Process each file + baseURL := strings.TrimSuffix(c.baseURL, "/api/models") for _, file := range files { fileName := filepath.Base(file.Path) isReadme := strings.Contains(strings.ToLower(fileName), "readme") @@ -227,11 +229,16 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) { sha256 = file.Oid } + // Construct the full URL for the file + // Use /resolve/main/ for downloading files (handles LFS properly) + fileURL := fmt.Sprintf("%s/%s/resolve/main/%s", baseURL, repoID, file.Path) + modelFile := ModelFile{ Path: file.Path, Size: file.Size, SHA256: sha256, IsReadme: isReadme, + URL: fileURL, } details.Files = append(details.Files, modelFile) diff --git a/pkg/huggingface-api/client_test.go b/pkg/huggingface-api/client_test.go index ffa35bb90a94..5ef5f7900930 100644 --- a/pkg/huggingface-api/client_test.go +++ b/pkg/huggingface-api/client_test.go @@ -1,6 +1,7 @@ package hfapi_test import ( + "fmt" "net/http" "net/http/httptest" "strings" @@ -461,6 +462,13 @@ var _ = Describe("HuggingFace API Client", func() { Expect(details.ReadmeFile).ToNot(BeNil()) Expect(details.ReadmeFile.Path).To(Equal("README.md")) Expect(details.ReadmeFile.IsReadme).To(BeTrue()) + + // Verify URLs are set for all files + baseURL := strings.TrimSuffix(server.URL, "/api/models") + for _, file := range details.Files { + expectedURL := fmt.Sprintf("%s/test/model/resolve/main/%s", baseURL, file.Path) + Expect(file.URL).To(Equal(expectedURL)) + } }) }) From c9f30fbc0f6e152e4c211fc5addb9174069e1046 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 12 Nov 2025 18:26:43 +0100 Subject: [PATCH 14/16] feat(importer/llama.cpp): add support for mmproj files Signed-off-by: Ettore Di Giacinto --- core/gallery/importers/llama-cpp.go | 55 +++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go index d4c8ff591fdb..65bebf7d1966 100644 --- a/core/gallery/importers/llama-cpp.go +++ b/core/gallery/importers/llama-cpp.go @@ -69,11 +69,17 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) } preferedQuantizations, _ := preferencesMap["quantizations"].(string) - quants := []string{"q4_k_m"} + quants := []string{"q4_k_m", "q4_0", "q8_0", "f16"} if preferedQuantizations != "" { quants = strings.Split(preferedQuantizations, ",") } + mmprojQuants, _ := preferencesMap["mmproj_quantizations"].(string) + mmprojQuantsList := []string{"fp16"} + if mmprojQuants != "" { + mmprojQuantsList = strings.Split(mmprojQuants, ",") + } + modelConfig := config.ModelConfig{ Name: name, Description: description, @@ -105,7 +111,11 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) }, } } else if details.HuggingFace != nil { + lastMMProjFile := gallery.File{} + foundPreferedQuant := false + for _, file := range details.HuggingFace.Files { + // get the files of the prefered quants if slices.ContainsFunc(quants, func(quant string) bool { return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant)) }) { @@ -115,14 +125,53 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) SHA256: file.SHA256, }) } + // Get the mmproj prefered quants + if strings.Contains(strings.ToLower(file.Path), "mmproj") { + lastMMProjFile = gallery.File{ + URI: file.URL, + Filename: filepath.Base(file.Path), + SHA256: file.SHA256, + } + if slices.ContainsFunc(mmprojQuantsList, func(quant string) bool { + return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant)) + }) { + foundPreferedQuant = true + cfg.Files = append(cfg.Files, lastMMProjFile) + } + } } - if len(modelConfig.DownloadFiles) > 0 { + + if !foundPreferedQuant && lastMMProjFile.URI != "" { + cfg.Files = append(cfg.Files, lastMMProjFile) modelConfig.PredictionOptions = schema.PredictionOptions{ BasicModelRequest: schema.BasicModelRequest{ - Model: modelConfig.DownloadFiles[0].Filename, + Model: lastMMProjFile.Filename, }, } } + + // Find first mmproj file + for _, file := range cfg.Files { + if !strings.Contains(strings.ToLower(file.Filename), "mmproj") { + continue + } + modelConfig.MMProj = file.Filename + break + } + + // Find first non-mmproj file + for _, file := range cfg.Files { + if strings.Contains(strings.ToLower(file.Filename), "mmproj") { + continue + } + modelConfig.PredictionOptions = schema.PredictionOptions{ + BasicModelRequest: schema.BasicModelRequest{ + Model: file.Filename, + }, + } + break + } + } data, err := yaml.Marshal(modelConfig) From 0d02850f3ddbec347992a09267536652ad6535a3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 12 Nov 2025 18:33:01 +0100 Subject: [PATCH 15/16] add mmproj quants to common preferences Signed-off-by: Ettore Di Giacinto --- core/http/views/model-editor.html | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html index d18aa6cdb924..d5b5fce764d7 100644 --- a/core/http/views/model-editor.html +++ b/core/http/views/model-editor.html @@ -183,6 +183,22 @@

Preferred quantizations (comma-separated). Examples: q4_k_m, q4_k_s, q3_k_m, q2_k. Leave empty to use default (q4_k_m).

+ + +
+ + +

+ Preferred MMProj quantizations (comma-separated). Examples: fp16, fp32. Leave empty to use default (fp16). +

+
@@ -441,7 +457,8 @@

backend: '', name: '', description: '', - quantizations: '' + quantizations: '', + mmproj_quantizations: '' }, isSubmitting: false, currentJobId: null, @@ -507,6 +524,9 @@

if (this.commonPreferences.quantizations && this.commonPreferences.quantizations.trim()) { prefsObj.quantizations = this.commonPreferences.quantizations.trim(); } + if (this.commonPreferences.mmproj_quantizations && this.commonPreferences.mmproj_quantizations.trim()) { + prefsObj.mmproj_quantizations = this.commonPreferences.mmproj_quantizations.trim(); + } // Add custom preferences (can override common ones) this.preferences.forEach(pref => { From 9366052eb955f7f12446eda762f638a2eaee61d3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Wed, 12 Nov 2025 19:24:49 +0100 Subject: [PATCH 16/16] Fix vlm usage in tokenizer mode with llama.cpp Signed-off-by: Ettore Di Giacinto --- backend/cpp/llama-cpp/grpc-server.cpp | 134 ++++++++++++++++++++++++-- 1 file changed, 124 insertions(+), 10 deletions(-) diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp index 790032d60316..17087fc060d0 100644 --- a/backend/cpp/llama-cpp/grpc-server.cpp +++ b/backend/cpp/llama-cpp/grpc-server.cpp @@ -590,17 +590,71 @@ class BackendServiceImpl final : public backend::Backend::Service { // Convert proto Messages to JSON format compatible with oaicompat_chat_params_parse json body_json; json messages_json = json::array(); + + // Find the last user message index to attach images/audio to + int last_user_msg_idx = -1; + for (int i = request->messages_size() - 1; i >= 0; i--) { + if (request->messages(i).role() == "user") { + last_user_msg_idx = i; + break; + } + } + for (int i = 0; i < request->messages_size(); i++) { const auto& msg = request->messages(i); json msg_json; msg_json["role"] = msg.role(); + bool is_last_user_msg = (i == last_user_msg_idx); + bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0); + // Handle content - can be string, null, or array // For multimodal content, we'll embed images/audio from separate fields if (!msg.content().empty()) { - msg_json["content"] = msg.content(); - } else if (request->images_size() > 0 || request->audios_size() > 0) { - // If no content but has images/audio, create content array + // Try to parse content as JSON to see if it's already an array + json content_val; + try { + content_val = json::parse(msg.content()); + } catch (const json::parse_error&) { + // Not JSON, treat as plain string + content_val = msg.content(); + } + + // If content is a string and this is the last user message with images/audio, combine them + if (content_val.is_string() && is_last_user_msg && has_images_or_audio) { + json content_array = json::array(); + // Add text first + content_array.push_back({{"type", "text"}, {"text", content_val.get()}}); + // Add images + if (request->images_size() > 0) { + for (int j = 0; j < request->images_size(); j++) { + json image_chunk; + image_chunk["type"] = "image_url"; + json image_url; + image_url["url"] = "data:image/jpeg;base64," + request->images(j); + image_chunk["image_url"] = image_url; + content_array.push_back(image_chunk); + } + } + // Add audios + if (request->audios_size() > 0) { + for (int j = 0; j < request->audios_size(); j++) { + json audio_chunk; + audio_chunk["type"] = "input_audio"; + json input_audio; + input_audio["data"] = request->audios(j); + input_audio["format"] = "wav"; // default, could be made configurable + audio_chunk["input_audio"] = input_audio; + content_array.push_back(audio_chunk); + } + } + msg_json["content"] = content_array; + } else { + // Use content as-is (already array or not last user message) + msg_json["content"] = content_val; + } + } else if (is_last_user_msg && has_images_or_audio) { + // If no content but this is the last user message with images/audio, create content array json content_array = json::array(); if (request->images_size() > 0) { for (int j = 0; j < request->images_size(); j++) { @@ -718,6 +772,9 @@ class BackendServiceImpl final : public backend::Backend::Service { // Create parser options with current chat_templates to ensure tmpls is not null oaicompat_parser_options parser_opt = ctx_server.oai_parser_opt; parser_opt.tmpls = ctx_server.chat_templates.get(); // Ensure tmpls is set to current chat_templates + // Update allow_image and allow_audio based on current mctx state + parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false; + parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false; json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files); // Extract the prompt from parsed data @@ -758,7 +815,7 @@ class BackendServiceImpl final : public backend::Backend::Service { // If not using chat templates, extract files from image_data/audio_data fields // (If using chat templates, files were already extracted by oaicompat_chat_params_parse) - //if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) { + if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) { const auto &images_data = data.find("image_data"); if (images_data != data.end() && images_data->is_array()) { @@ -778,7 +835,7 @@ class BackendServiceImpl final : public backend::Backend::Service { files.push_back(decoded_data); } } - // } + } const bool has_mtmd = ctx_server.mctx != nullptr; @@ -917,17 +974,71 @@ class BackendServiceImpl final : public backend::Backend::Service { // Convert proto Messages to JSON format compatible with oaicompat_chat_params_parse json body_json; json messages_json = json::array(); + + // Find the last user message index to attach images/audio to + int last_user_msg_idx = -1; + for (int i = request->messages_size() - 1; i >= 0; i--) { + if (request->messages(i).role() == "user") { + last_user_msg_idx = i; + break; + } + } + for (int i = 0; i < request->messages_size(); i++) { const auto& msg = request->messages(i); json msg_json; msg_json["role"] = msg.role(); + bool is_last_user_msg = (i == last_user_msg_idx); + bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0); + // Handle content - can be string, null, or array // For multimodal content, we'll embed images/audio from separate fields if (!msg.content().empty()) { - msg_json["content"] = msg.content(); - } else if (request->images_size() > 0 || request->audios_size() > 0) { - // If no content but has images/audio, create content array + // Try to parse content as JSON to see if it's already an array + json content_val; + try { + content_val = json::parse(msg.content()); + } catch (const json::parse_error&) { + // Not JSON, treat as plain string + content_val = msg.content(); + } + + // If content is a string and this is the last user message with images/audio, combine them + if (content_val.is_string() && is_last_user_msg && has_images_or_audio) { + json content_array = json::array(); + // Add text first + content_array.push_back({{"type", "text"}, {"text", content_val.get()}}); + // Add images + if (request->images_size() > 0) { + for (int j = 0; j < request->images_size(); j++) { + json image_chunk; + image_chunk["type"] = "image_url"; + json image_url; + image_url["url"] = "data:image/jpeg;base64," + request->images(j); + image_chunk["image_url"] = image_url; + content_array.push_back(image_chunk); + } + } + // Add audios + if (request->audios_size() > 0) { + for (int j = 0; j < request->audios_size(); j++) { + json audio_chunk; + audio_chunk["type"] = "input_audio"; + json input_audio; + input_audio["data"] = request->audios(j); + input_audio["format"] = "wav"; // default, could be made configurable + audio_chunk["input_audio"] = input_audio; + content_array.push_back(audio_chunk); + } + } + msg_json["content"] = content_array; + } else { + // Use content as-is (already array or not last user message) + msg_json["content"] = content_val; + } + } else if (is_last_user_msg && has_images_or_audio) { + // If no content but this is the last user message with images/audio, create content array json content_array = json::array(); if (request->images_size() > 0) { for (int j = 0; j < request->images_size(); j++) { @@ -1048,6 +1159,9 @@ class BackendServiceImpl final : public backend::Backend::Service { // Create parser options with current chat_templates to ensure tmpls is not null oaicompat_parser_options parser_opt = ctx_server.oai_parser_opt; parser_opt.tmpls = ctx_server.chat_templates.get(); // Ensure tmpls is set to current chat_templates + // Update allow_image and allow_audio based on current mctx state + parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false; + parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false; json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files); // Extract the prompt from parsed data @@ -1088,7 +1202,7 @@ class BackendServiceImpl final : public backend::Backend::Service { // If not using chat templates, extract files from image_data/audio_data fields // (If using chat templates, files were already extracted by oaicompat_chat_params_parse) - // if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) { + if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) { const auto &images_data = data.find("image_data"); if (images_data != data.end() && images_data->is_array()) { @@ -1110,7 +1224,7 @@ class BackendServiceImpl final : public backend::Backend::Service { files.push_back(decoded_data); } } - // } + } // process files const bool has_mtmd = ctx_server.mctx != nullptr;