From c51124c710e1aa134d0ff10afad6511559d73022 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 11 Nov 2025 18:48:33 +0100
Subject: [PATCH 01/16] feat: initial hook to install elements directly
Signed-off-by: Ettore Di Giacinto
---
core/http/endpoints/localai/backend.go | 4 +-
core/http/endpoints/localai/gallery.go | 4 +-
core/http/endpoints/localai/import_model.go | 43 +++++++++++++++++++++
core/http/routes/localai.go | 3 ++
core/http/routes/ui_api.go | 8 ++--
core/p2p/sync.go | 2 +-
core/schema/localai.go | 6 +++
core/services/backends.go | 2 +-
core/services/gallery.go | 8 ++--
core/services/models.go | 35 ++++++++++-------
core/services/operation.go | 8 +++-
11 files changed, 92 insertions(+), 31 deletions(-)
diff --git a/core/http/endpoints/localai/backend.go b/core/http/endpoints/localai/backend.go
index f4d88ffec154..e2f5f5635a21 100644
--- a/core/http/endpoints/localai/backend.go
+++ b/core/http/endpoints/localai/backend.go
@@ -76,7 +76,7 @@ func (mgs *BackendEndpointService) ApplyBackendEndpoint() func(c *fiber.Ctx) err
if err != nil {
return err
}
- mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{
+ mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{
ID: uuid.String(),
GalleryElementName: input.ID,
Galleries: mgs.galleries,
@@ -95,7 +95,7 @@ func (mgs *BackendEndpointService) DeleteBackendEndpoint() func(c *fiber.Ctx) er
return func(c *fiber.Ctx) error {
backendName := c.Params("name")
- mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{
+ mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{
Delete: true,
GalleryElementName: backendName,
Galleries: mgs.galleries,
diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go
index 8948fde34250..d8079bcc5128 100644
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -77,7 +77,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
if err != nil {
return err
}
- mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
+ mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Req: input.GalleryModel,
ID: uuid.String(),
GalleryElementName: input.ID,
@@ -98,7 +98,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
return func(c *fiber.Ctx) error {
modelName := c.Params("name")
- mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
+ mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Delete: true,
GalleryElementName: modelName,
}
diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go
index c6ff6bb962ea..e579642ed10e 100644
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -2,16 +2,59 @@ package localai
import (
"encoding/json"
+ "fmt"
"os"
"path/filepath"
"strings"
"github.com/gofiber/fiber/v2"
+ "github.com/google/uuid"
"github.com/mudler/LocalAI/core/config"
+ "github.com/mudler/LocalAI/core/gallery"
+ httpUtils "github.com/mudler/LocalAI/core/http/utils"
+ "github.com/mudler/LocalAI/core/schema"
+ "github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/utils"
+
"gopkg.in/yaml.v3"
)
+// ImportModelURIEndpoint handles creating new model configurations from a URI
+func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) fiber.Handler {
+ return func(c *fiber.Ctx) error {
+
+ input := new(schema.ImportModelRequest)
+
+ if err := c.BodyParser(input); err != nil {
+ return err
+ }
+
+ // From the input, we are going to extract a default settings for the model, and use the gallery service
+ // TODO: This is now all fake data
+ modelConfig := gallery.ModelConfig{
+ Name: "imported-model-name",
+ }
+
+ uuid, err := uuid.NewUUID()
+ if err != nil {
+ return err
+ }
+ galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
+ Req: gallery.GalleryModel{
+ Overrides: map[string]interface{}{},
+ },
+ ID: uuid.String(),
+ GalleryElement: &modelConfig,
+ BackendGalleries: appConfig.BackendGalleries,
+ }
+
+ return c.JSON(schema.GalleryResponse{
+ ID: uuid.String(),
+ StatusURL: fmt.Sprintf("%smodels/jobs/%s", httpUtils.BaseURL(c), uuid.String()),
+ })
+ }
+}
+
// ImportModelEndpoint handles creating new model configurations
func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) fiber.Handler {
return func(c *fiber.Ctx) error {
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index 298f5fd787b3..119a02b55311 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -57,6 +57,9 @@ func RegisterLocalAIRoutes(router *fiber.App,
// Custom model import endpoint
router.Post("/models/import", localai.ImportModelEndpoint(cl, appConfig))
+ // URI model import endpoint
+ router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService))
+
// Custom model edit endpoint
router.Post("/models/edit/:name", localai.EditModelEndpoint(cl, appConfig))
diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go
index 1fb016825c2b..d60809512d38 100644
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -247,7 +247,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
uid := id.String()
opcache.Set(galleryID, uid)
- op := services.GalleryOp[gallery.GalleryModel]{
+ op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
ID: uid,
GalleryElementName: galleryID,
Galleries: appConfig.Galleries,
@@ -290,7 +290,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
opcache.Set(galleryID, uid)
- op := services.GalleryOp[gallery.GalleryModel]{
+ op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
ID: uid,
Delete: true,
GalleryElementName: galleryName,
@@ -525,7 +525,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
uid := id.String()
opcache.Set(backendID, uid)
- op := services.GalleryOp[gallery.GalleryBackend]{
+ op := services.GalleryOp[gallery.GalleryBackend, any]{
ID: uid,
GalleryElementName: backendID,
Galleries: appConfig.BackendGalleries,
@@ -567,7 +567,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
opcache.Set(backendID, uid)
- op := services.GalleryOp[gallery.GalleryBackend]{
+ op := services.GalleryOp[gallery.GalleryBackend, any]{
ID: uid,
Delete: true,
GalleryElementName: backendName,
diff --git a/core/p2p/sync.go b/core/p2p/sync.go
index f9be422a6bf5..44efe93d8224 100644
--- a/core/p2p/sync.go
+++ b/core/p2p/sync.go
@@ -73,7 +73,7 @@ func syncState(ctx context.Context, n *node.Node, app *application.Application)
continue
}
- app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
+ app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
ID: uuid.String(),
GalleryElementName: model,
Galleries: app.ApplicationConfig().Galleries,
diff --git a/core/schema/localai.go b/core/schema/localai.go
index 0f9460cf4ae1..5eb56d91bf5d 100644
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -1,6 +1,7 @@
package schema
import (
+ "encoding/json"
"time"
gopsutil "github.com/shirou/gopsutil/v3/process"
@@ -157,3 +158,8 @@ type Detection struct {
Height float32 `json:"height"`
ClassName string `json:"class_name"`
}
+
+type ImportModelRequest struct {
+ URI string `json:"uri"`
+ Preferences json.RawMessage `json:"preferences,omitempty"`
+}
diff --git a/core/services/backends.go b/core/services/backends.go
index 7295734dfcbc..7ffca2ac1030 100644
--- a/core/services/backends.go
+++ b/core/services/backends.go
@@ -8,7 +8,7 @@ import (
"github.com/rs/zerolog/log"
)
-func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend], systemState *system.SystemState) error {
+func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend, any], systemState *system.SystemState) error {
utils.ResetDownloadTimers()
g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})
diff --git a/core/services/gallery.go b/core/services/gallery.go
index 4833aa334ba2..2290c450d9ed 100644
--- a/core/services/gallery.go
+++ b/core/services/gallery.go
@@ -14,8 +14,8 @@ import (
type GalleryService struct {
appConfig *config.ApplicationConfig
sync.Mutex
- ModelGalleryChannel chan GalleryOp[gallery.GalleryModel]
- BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend]
+ ModelGalleryChannel chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig]
+ BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend, any]
modelLoader *model.ModelLoader
statuses map[string]*GalleryOpStatus
@@ -24,8 +24,8 @@ type GalleryService struct {
func NewGalleryService(appConfig *config.ApplicationConfig, ml *model.ModelLoader) *GalleryService {
return &GalleryService{
appConfig: appConfig,
- ModelGalleryChannel: make(chan GalleryOp[gallery.GalleryModel]),
- BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend]),
+ ModelGalleryChannel: make(chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig]),
+ BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend, any]),
modelLoader: ml,
statuses: make(map[string]*GalleryOpStatus),
}
diff --git a/core/services/models.go b/core/services/models.go
index 43696f455a09..b22999f6b977 100644
--- a/core/services/models.go
+++ b/core/services/models.go
@@ -9,10 +9,11 @@ import (
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
"github.com/mudler/LocalAI/pkg/utils"
+ "github.com/rs/zerolog/log"
"gopkg.in/yaml.v2"
)
-func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel], cl *config.ModelConfigLoader, systemState *system.SystemState) error {
+func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig], cl *config.ModelConfigLoader, systemState *system.SystemState) error {
utils.ResetDownloadTimers()
g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})
@@ -118,28 +119,32 @@ func ApplyGalleryFromString(systemState *system.SystemState, modelLoader *model.
// processModelOperation handles the installation or deletion of a model
func processModelOperation(
- op *GalleryOp[gallery.GalleryModel],
+ op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig],
systemState *system.SystemState,
modelLoader *model.ModelLoader,
enforcePredownloadScans bool,
automaticallyInstallBackend bool,
progressCallback func(string, string, string, float64),
) error {
- // delete a model
- if op.Delete {
+ switch {
+ case op.Delete:
return gallery.DeleteModelFromSystem(systemState, op.GalleryElementName)
- }
-
- // if the request contains a gallery name, we apply the gallery from the gallery list
- if op.GalleryElementName != "" {
+ case op.GalleryElement != nil:
+ installedModel, err := gallery.InstallModel(
+ systemState, op.GalleryElement.Name,
+ op.GalleryElement,
+ op.Req.Overrides,
+ progressCallback, enforcePredownloadScans)
+ if automaticallyInstallBackend && installedModel.Backend != "" {
+ log.Debug().Msgf("Installing backend %q", installedModel.Backend)
+ if err := gallery.InstallBackendFromGallery(op.BackendGalleries, systemState, modelLoader, installedModel.Backend, progressCallback, false); err != nil {
+ return err
+ }
+ }
+ return err
+ case op.GalleryElementName != "":
return gallery.InstallModelFromGallery(op.Galleries, op.BackendGalleries, systemState, modelLoader, op.GalleryElementName, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend)
- // } else if op.ConfigURL != "" {
- // err := startup.InstallModels(op.Galleries, modelPath, enforcePredownloadScans, progressCallback, op.ConfigURL)
- // if err != nil {
- // return err
- // }
- // return cl.Preload(modelPath)
- } else {
+ default:
return installModelFromRemoteConfig(systemState, modelLoader, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend, op.BackendGalleries)
}
}
diff --git a/core/services/operation.go b/core/services/operation.go
index 962921807f9d..d0ba76ceb1a9 100644
--- a/core/services/operation.go
+++ b/core/services/operation.go
@@ -5,12 +5,16 @@ import (
"github.com/mudler/LocalAI/pkg/xsync"
)
-type GalleryOp[T any] struct {
+type GalleryOp[T any, E any] struct {
ID string
GalleryElementName string
Delete bool
- Req T
+ Req T
+
+ // If specified, we install directly the gallery element
+ GalleryElement *E
+
Galleries []config.Gallery
BackendGalleries []config.Gallery
}
From 1c5d78d3dadcee40e291179dfd53516c3cbf3772 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 11 Nov 2025 19:04:28 +0100
Subject: [PATCH 02/16] WIP: ui changes
Signed-off-by: Ettore Di Giacinto
---
core/http/views/model-editor.html | 669 +++++++++++++++--------
core/http/views/partials/inprogress.html | 9 +
2 files changed, 465 insertions(+), 213 deletions(-)
diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html
index d5cbc550b7c4..f8db7cf66fae 100644
--- a/core/http/views/model-editor.html
+++ b/core/http/views/model-editor.html
@@ -3,9 +3,10 @@
{{template "views/partials/head" .}}
-
+
{{template "views/partials/navbar" .}}
+ {{template "views/partials/inprogress" .}}
@@ -24,19 +25,44 @@
{{if .ModelName}}Edit Model: {{.ModelName}}{{else}}Import New Model{{end}}
-
Configure your model settings using YAML
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -45,8 +71,88 @@
-
-
+
+
+
+
+
+
+
+
+
+ Import from URI
+
+
+
+
+
+
+
+ Enter the URI or path to the model file you want to import
+
+
+
+
+
+
+
+
+
+
+
+
+ Add key-value pairs to customize the import process
+
+
+
+
+
+
+
@@ -144,22 +250,22 @@
}
/* Enhanced YAML Syntax Highlighting */
-.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for YAML keys */
-.cm-string { color: #10b981 !important; } /* Emerald for strings */
-.cm-number { color: #f59e0b !important; } /* Amber for numbers */
-.cm-comment { color: #6b7280 !important; font-style: italic !important; } /* Gray for comments */
-.cm-property { color: #ec4899 !important; } /* Pink for properties */
-.cm-operator { color: #ef4444 !important; } /* Red for operators */
-.cm-variable { color: #06b6d4 !important; } /* Cyan for variables */
-.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for tags */
-.cm-attribute { color: #f59e0b !important; } /* Amber for attributes */
-.cm-def { color: #ec4899 !important; font-weight: 600 !important; } /* Pink for definitions */
-.cm-bracket { color: #d1d5db !important; } /* Light gray for brackets */
-.cm-punctuation { color: #d1d5db !important; } /* Light gray for punctuation */
-.cm-quote { color: #10b981 !important; } /* Emerald for quotes */
-.cm-meta { color: #6b7280 !important; } /* Gray for meta */
-.cm-builtin { color: #f472b6 !important; } /* Pink for builtins */
-.cm-atom { color: #f59e0b !important; } /* Amber for atoms like true/false/null */
+.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; }
+.cm-string { color: #10b981 !important; }
+.cm-number { color: #f59e0b !important; }
+.cm-comment { color: #6b7280 !important; font-style: italic !important; }
+.cm-property { color: #ec4899 !important; }
+.cm-operator { color: #ef4444 !important; }
+.cm-variable { color: #06b6d4 !important; }
+.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; }
+.cm-attribute { color: #f59e0b !important; }
+.cm-def { color: #ec4899 !important; font-weight: 600 !important; }
+.cm-bracket { color: #d1d5db !important; }
+.cm-punctuation { color: #d1d5db !important; }
+.cm-quote { color: #10b981 !important; }
+.cm-meta { color: #6b7280 !important; }
+.cm-builtin { color: #f472b6 !important; }
+.cm-atom { color: #f59e0b !important; }
/* Enhanced scrollbar styling */
.CodeMirror-scrollbar-filler, .CodeMirror-gutter-filler {
@@ -242,22 +348,195 @@
diff --git a/core/http/views/partials/inprogress.html b/core/http/views/partials/inprogress.html
index 318406cb0881..7ebe04927d1b 100644
--- a/core/http/views/partials/inprogress.html
+++ b/core/http/views/partials/inprogress.html
@@ -120,8 +120,17 @@
throw new Error('Failed to fetch operations');
}
const data = await response.json();
+ const previousCount = this.operations.length;
this.operations = data.operations || [];
+ // If we had operations before and now we don't, refresh the page
+ if (previousCount > 0 && this.operations.length === 0) {
+ // Small delay to ensure the user sees the completion
+ setTimeout(() => {
+ window.location.reload();
+ }, 1000);
+ }
+
// Auto-collapse if there are many operations
if (this.operations.length > 5 && !this.collapsed) {
// Don't auto-collapse, let user control it
From 1ad42a08d7425e5d662afe820ae6cee3045763d5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 11 Nov 2025 21:28:23 +0100
Subject: [PATCH 03/16] Move HF api client to pkg
Signed-off-by: Ettore Di Giacinto
---
.github/gallery-agent/agent.go | 4 +-
.github/gallery-agent/go.mod | 38 ----
.github/gallery-agent/go.sum | 168 ------------------
.github/gallery-agent/main.go | 2 +-
.github/gallery-agent/tools.go | 8 +-
.github/workflows/gallery-agent.yaml | 9 +-
go.mod | 2 +-
.../hfapi => pkg/huggingface-api}/client.go | 0
.../huggingface-api}/client_test.go | 2 +-
.../huggingface-api}/hfapi_suite_test.go | 2 +
10 files changed, 12 insertions(+), 223 deletions(-)
delete mode 100644 .github/gallery-agent/go.mod
delete mode 100644 .github/gallery-agent/go.sum
rename {.github/gallery-agent/hfapi => pkg/huggingface-api}/client.go (100%)
rename {.github/gallery-agent/hfapi => pkg/huggingface-api}/client_test.go (99%)
rename {.github/gallery-agent/hfapi => pkg/huggingface-api}/hfapi_suite_test.go (99%)
diff --git a/.github/gallery-agent/agent.go b/.github/gallery-agent/agent.go
index 19262669f574..687b5a1eb77f 100644
--- a/.github/gallery-agent/agent.go
+++ b/.github/gallery-agent/agent.go
@@ -7,8 +7,8 @@ import (
"slices"
"strings"
- "github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
- "github.com/mudler/cogito"
+ hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
+ cogito "github.com/mudler/cogito"
"github.com/mudler/cogito/structures"
"github.com/sashabaranov/go-openai/jsonschema"
diff --git a/.github/gallery-agent/go.mod b/.github/gallery-agent/go.mod
deleted file mode 100644
index 7129f5507bb4..000000000000
--- a/.github/gallery-agent/go.mod
+++ /dev/null
@@ -1,38 +0,0 @@
-module github.com/go-skynet/LocalAI/.github/gallery-agent
-
-go 1.24.1
-
-require (
- github.com/mudler/cogito v0.3.0
- github.com/onsi/ginkgo/v2 v2.25.3
- github.com/onsi/gomega v1.38.2
- github.com/sashabaranov/go-openai v1.41.2
- github.com/tmc/langchaingo v0.1.13
-)
-
-require (
- dario.cat/mergo v1.0.1 // indirect
- github.com/Masterminds/goutils v1.1.1 // indirect
- github.com/Masterminds/semver/v3 v3.4.0 // indirect
- github.com/Masterminds/sprig/v3 v3.3.0 // indirect
- github.com/go-logr/logr v1.4.3 // indirect
- github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
- github.com/google/go-cmp v0.7.0 // indirect
- github.com/google/jsonschema-go v0.3.0 // indirect
- github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
- github.com/google/uuid v1.6.0 // indirect
- github.com/huandu/xstrings v1.5.0 // indirect
- github.com/mitchellh/copystructure v1.2.0 // indirect
- github.com/mitchellh/reflectwalk v1.0.2 // indirect
- github.com/modelcontextprotocol/go-sdk v1.0.0 // indirect
- github.com/shopspring/decimal v1.4.0 // indirect
- github.com/spf13/cast v1.7.0 // indirect
- github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
- go.uber.org/automaxprocs v1.6.0 // indirect
- go.yaml.in/yaml/v3 v3.0.4 // indirect
- golang.org/x/crypto v0.41.0 // indirect
- golang.org/x/net v0.43.0 // indirect
- golang.org/x/sys v0.35.0 // indirect
- golang.org/x/text v0.28.0 // indirect
- golang.org/x/tools v0.36.0 // indirect
-)
diff --git a/.github/gallery-agent/go.sum b/.github/gallery-agent/go.sum
deleted file mode 100644
index a5488f5b60a5..000000000000
--- a/.github/gallery-agent/go.sum
+++ /dev/null
@@ -1,168 +0,0 @@
-dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
-dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
-github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
-github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
-github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
-github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
-github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
-github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
-github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
-github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
-github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
-github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
-github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
-github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
-github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
-github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
-github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
-github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
-github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
-github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
-github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
-github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
-github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
-github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
-github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
-github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
-github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw=
-github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
-github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
-github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
-github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
-github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
-github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw=
-github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
-github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
-github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
-github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
-github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
-github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
-github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
-github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
-github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
-github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
-github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
-github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
-github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
-github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
-github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
-github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
-github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
-github.com/google/jsonschema-go v0.3.0 h1:6AH2TxVNtk3IlvkkhjrtbUc4S8AvO0Xii0DxIygDg+Q=
-github.com/google/jsonschema-go v0.3.0/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
-github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
-github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
-github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
-github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
-github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
-github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
-github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
-github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
-github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
-github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
-github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE=
-github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
-github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
-github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
-github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
-github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
-github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
-github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
-github.com/moby/go-archive v0.1.0 h1:Kk/5rdW/g+H8NHdJW2gsXyZ7UnzvJNOy6VKJqueWdcQ=
-github.com/moby/go-archive v0.1.0/go.mod h1:G9B+YoujNohJmrIYFBpSd54GTUB4lt9S+xVQvsJyFuo=
-github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk=
-github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
-github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
-github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
-github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs=
-github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
-github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
-github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
-github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
-github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
-github.com/modelcontextprotocol/go-sdk v1.0.0 h1:Z4MSjLi38bTgLrd/LjSmofqRqyBiVKRyQSJgw8q8V74=
-github.com/modelcontextprotocol/go-sdk v1.0.0/go.mod h1:nYtYQroQ2KQiM0/SbyEPUWQ6xs4B95gJjEalc9AQyOs=
-github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
-github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
-github.com/mudler/cogito v0.3.0 h1:NbVAO3bLkK5oGSY0xq87jlz8C9OIsLW55s+8Hfzeu9s=
-github.com/mudler/cogito v0.3.0/go.mod h1:abMwl+CUjCp87IufA2quZdZt0bbLaHHN79o17HbUKxU=
-github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw=
-github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE=
-github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
-github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
-github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
-github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
-github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
-github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
-github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
-github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
-github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
-github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
-github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
-github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
-github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
-github.com/sashabaranov/go-openai v1.41.2 h1:vfPRBZNMpnqu8ELsclWcAvF19lDNgh1t6TVfFFOPiSM=
-github.com/sashabaranov/go-openai v1.41.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
-github.com/shirou/gopsutil/v4 v4.25.5 h1:rtd9piuSMGeU8g1RMXjZs9y9luK5BwtnG7dZaQUJAsc=
-github.com/shirou/gopsutil/v4 v4.25.5/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c=
-github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
-github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
-github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
-github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
-github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
-github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
-github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
-github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
-github.com/testcontainers/testcontainers-go v0.38.0 h1:d7uEapLcv2P8AvH8ahLqDMMxda2W9gQN1nRbHS28HBw=
-github.com/testcontainers/testcontainers-go v0.38.0/go.mod h1:C52c9MoHpWO+C4aqmgSU+hxlR5jlEayWtgYrb8Pzz1w=
-github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
-github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
-github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
-github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
-github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA=
-github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
-github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
-github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
-github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
-github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
-go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
-go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 h1:Xs2Ncz0gNihqu9iosIZ5SkBbWo5T8JhhLJFMQL1qmLI=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0/go.mod h1:vy+2G/6NvVMpwGX/NyLqcC41fxepnuKHk16E6IZUcJc=
-go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
-go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
-go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
-go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
-go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
-go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
-go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
-go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
-go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
-go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
-golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
-golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
-golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
-golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
-golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
-golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
-golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
-golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
-golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
-golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
-google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
-google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
-gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/.github/gallery-agent/main.go b/.github/gallery-agent/main.go
index 78d3a6cd7612..40412f198d4f 100644
--- a/.github/gallery-agent/main.go
+++ b/.github/gallery-agent/main.go
@@ -9,7 +9,7 @@ import (
"strings"
"time"
- "github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
+ hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
)
// ProcessedModelFile represents a processed model file with additional metadata
diff --git a/.github/gallery-agent/tools.go b/.github/gallery-agent/tools.go
index b44c327764f9..3e2fc2f3a17c 100644
--- a/.github/gallery-agent/tools.go
+++ b/.github/gallery-agent/tools.go
@@ -3,9 +3,9 @@ package main
import (
"fmt"
- "github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
- "github.com/sashabaranov/go-openai"
- "github.com/tmc/langchaingo/jsonschema"
+ hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
+ openai "github.com/sashabaranov/go-openai"
+ jsonschema "github.com/sashabaranov/go-openai/jsonschema"
)
// Get repository README from HF
@@ -13,7 +13,7 @@ type HFReadmeTool struct {
client *hfapi.Client
}
-func (s *HFReadmeTool) Run(args map[string]any) (string, error) {
+func (s *HFReadmeTool) Execute(args map[string]any) (string, error) {
q, ok := args["repository"].(string)
if !ok {
return "", fmt.Errorf("no query")
diff --git a/.github/workflows/gallery-agent.yaml b/.github/workflows/gallery-agent.yaml
index e796d2f2cba3..4c4074bbf215 100644
--- a/.github/workflows/gallery-agent.yaml
+++ b/.github/workflows/gallery-agent.yaml
@@ -39,11 +39,6 @@ jobs:
with:
go-version: '1.21'
- - name: Build gallery agent
- run: |
- cd .github/gallery-agent
- go mod download
- go build -o gallery-agent .
- name: Run gallery agent
env:
@@ -56,9 +51,7 @@ jobs:
MAX_MODELS: ${{ github.event.inputs.max_models || '1' }}
run: |
export GALLERY_INDEX_PATH=$PWD/gallery/index.yaml
- cd .github/gallery-agent
- ./gallery-agent
- rm -rf gallery-agent
+ go run .github/gallery-agent
- name: Check for changes
id: check_changes
diff --git a/go.mod b/go.mod
index f520ff4edfed..464ec1553086 100644
--- a/go.mod
+++ b/go.mod
@@ -59,6 +59,7 @@ require (
go.opentelemetry.io/otel/metric v1.38.0
go.opentelemetry.io/otel/sdk/metric v1.38.0
google.golang.org/grpc v1.76.0
+ google.golang.org/protobuf v1.36.8
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.1
oras.land/oras-go/v2 v2.6.0
@@ -148,7 +149,6 @@ require (
golang.org/x/oauth2 v0.30.0 // indirect
golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053 // indirect
golang.org/x/time v0.12.0 // indirect
- google.golang.org/protobuf v1.36.8 // indirect
)
require (
diff --git a/.github/gallery-agent/hfapi/client.go b/pkg/huggingface-api/client.go
similarity index 100%
rename from .github/gallery-agent/hfapi/client.go
rename to pkg/huggingface-api/client.go
diff --git a/.github/gallery-agent/hfapi/client_test.go b/pkg/huggingface-api/client_test.go
similarity index 99%
rename from .github/gallery-agent/hfapi/client_test.go
rename to pkg/huggingface-api/client_test.go
index c490d0e9deb3..37a381416125 100644
--- a/.github/gallery-agent/hfapi/client_test.go
+++ b/pkg/huggingface-api/client_test.go
@@ -8,7 +8,7 @@ import (
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
- "github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
+ hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
)
var _ = Describe("HuggingFace API Client", func() {
diff --git a/.github/gallery-agent/hfapi/hfapi_suite_test.go b/pkg/huggingface-api/hfapi_suite_test.go
similarity index 99%
rename from .github/gallery-agent/hfapi/hfapi_suite_test.go
rename to pkg/huggingface-api/hfapi_suite_test.go
index 6dca0c029cbe..ec4654c5a9a7 100644
--- a/.github/gallery-agent/hfapi/hfapi_suite_test.go
+++ b/pkg/huggingface-api/hfapi_suite_test.go
@@ -11,3 +11,5 @@ func TestHfapi(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "HuggingFace API Suite")
}
+
+
From cbf567a92ba7bdb92e0f0a52c2e2b40ac525e25f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 11 Nov 2025 21:54:21 +0100
Subject: [PATCH 04/16] Add simple importer for gguf files
Signed-off-by: Ettore Di Giacinto
---
core/gallery/importers/llama-cpp.go | 92 +++++++++++++++++++++
core/http/endpoints/localai/import_model.go | 25 +++++-
2 files changed, 113 insertions(+), 4 deletions(-)
create mode 100644 core/gallery/importers/llama-cpp.go
diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go
new file mode 100644
index 000000000000..e91afe9972d8
--- /dev/null
+++ b/core/gallery/importers/llama-cpp.go
@@ -0,0 +1,92 @@
+package importers
+
+import (
+ "encoding/json"
+ "path/filepath"
+ "strings"
+
+ "github.com/mudler/LocalAI/core/config"
+ "github.com/mudler/LocalAI/core/gallery"
+ "github.com/mudler/LocalAI/core/schema"
+ "github.com/mudler/LocalAI/pkg/functions"
+ "go.yaml.in/yaml/v2"
+)
+
+type LlamaCPPImporter struct{}
+
+func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest) bool {
+ preferences, err := request.Preferences.MarshalJSON()
+ if err != nil {
+ return false
+ }
+ preferencesMap := make(map[string]any)
+ err = json.Unmarshal(preferences, &preferencesMap)
+ if err != nil {
+ return false
+ }
+
+ if preferencesMap["backend"] == "llama-cpp" {
+ return true
+ }
+
+ return strings.HasSuffix(uri, ".gguf")
+}
+
+func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) {
+ preferences, err := request.Preferences.MarshalJSON()
+ if err != nil {
+ return gallery.ModelConfig{}, err
+ }
+ preferencesMap := make(map[string]any)
+ err = json.Unmarshal(preferences, &preferencesMap)
+ if err != nil {
+ return gallery.ModelConfig{}, err
+ }
+
+ name, ok := preferencesMap["name"].(string)
+ if !ok {
+ name = filepath.Base(uri)
+ }
+
+ description, ok := preferencesMap["description"].(string)
+ if !ok {
+ description = "Imported from " + uri
+ }
+
+ modelConfig := config.ModelConfig{
+ Name: name,
+ Description: description,
+ KnownUsecaseStrings: []string{"chat"},
+ Backend: "llama-cpp",
+ PredictionOptions: schema.PredictionOptions{
+ BasicModelRequest: schema.BasicModelRequest{
+ Model: filepath.Base(uri),
+ },
+ },
+ TemplateConfig: config.TemplateConfig{
+ UseTokenizerTemplate: true,
+ },
+ FunctionsConfig: functions.FunctionsConfig{
+ GrammarConfig: functions.GrammarConfig{
+ NoGrammar: true,
+ },
+ },
+ }
+
+ data, err := yaml.Marshal(modelConfig)
+ if err != nil {
+ return gallery.ModelConfig{}, err
+ }
+
+ return gallery.ModelConfig{
+ Name: name,
+ Description: description,
+ ConfigFile: string(data),
+ Files: []gallery.File{
+ {
+ URI: uri,
+ Filename: filepath.Base(uri),
+ },
+ },
+ }, nil
+}
diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go
index e579642ed10e..eb6dbc80322c 100644
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -11,6 +11,7 @@ import (
"github.com/google/uuid"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
+ "github.com/mudler/LocalAI/core/gallery/importers"
httpUtils "github.com/mudler/LocalAI/core/http/utils"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
@@ -19,6 +20,15 @@ import (
"gopkg.in/yaml.v3"
)
+type Importer interface {
+ Match(uri string, request schema.ImportModelRequest) bool
+ Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error)
+}
+
+var defaultImporters = []Importer{
+ &importers.LlamaCPPImporter{},
+}
+
// ImportModelURIEndpoint handles creating new model configurations from a URI
func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) fiber.Handler {
return func(c *fiber.Ctx) error {
@@ -29,10 +39,17 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
return err
}
- // From the input, we are going to extract a default settings for the model, and use the gallery service
- // TODO: This is now all fake data
- modelConfig := gallery.ModelConfig{
- Name: "imported-model-name",
+ var err error
+ var modelConfig gallery.ModelConfig
+
+ for _, importer := range defaultImporters {
+ if importer.Match(input.URI, *input) {
+ modelConfig, err = importer.Import(input.URI, *input)
+ if err != nil {
+ continue
+ }
+ break
+ }
}
uuid, err := uuid.NewUUID()
From 0a9d6fa75d860abc59182977f9033d1f27e361fc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 11 Nov 2025 21:58:59 +0100
Subject: [PATCH 05/16] Add opcache
Signed-off-by: Ettore Di Giacinto
---
core/http/app.go | 11 ++++++++---
core/http/endpoints/localai/import_model.go | 21 +++++++++++++++++----
core/http/routes/localai.go | 5 +++--
3 files changed, 28 insertions(+), 9 deletions(-)
diff --git a/core/http/app.go b/core/http/app.go
index dcd9a2219958..916ea9600b81 100644
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -200,11 +200,16 @@ func API(application *application.Application) (*fiber.App, error) {
requestExtractor := middleware.NewRequestExtractor(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
routes.RegisterElevenLabsRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
- routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
+
+ // Create opcache for tracking UI operations (used by both UI and LocalAI routes)
+ var opcache *services.OpCache
+ if !application.ApplicationConfig().DisableWebUI {
+ opcache = services.NewOpCache(application.GalleryService())
+ }
+
+ routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
routes.RegisterOpenAIRoutes(router, requestExtractor, application)
if !application.ApplicationConfig().DisableWebUI {
- // Create opcache for tracking UI operations
- opcache := services.NewOpCache(application.GalleryService())
routes.RegisterUIAPIRoutes(router, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
routes.RegisterUIRoutes(router, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
}
diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go
index eb6dbc80322c..73e47e9b549d 100644
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -30,7 +30,7 @@ var defaultImporters = []Importer{
}
// ImportModelURIEndpoint handles creating new model configurations from a URI
-func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) fiber.Handler {
+func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) fiber.Handler {
return func(c *fiber.Ctx) error {
input := new(schema.ImportModelRequest)
@@ -56,13 +56,26 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
if err != nil {
return err
}
+
+ // Determine gallery ID for tracking - use model name if available, otherwise use URI
+ galleryID := input.URI
+ if modelConfig.Name != "" {
+ galleryID = modelConfig.Name
+ }
+
+ // Register operation in opcache if available (for UI progress tracking)
+ if opcache != nil {
+ opcache.Set(galleryID, uuid.String())
+ }
+
galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
Req: gallery.GalleryModel{
Overrides: map[string]interface{}{},
},
- ID: uuid.String(),
- GalleryElement: &modelConfig,
- BackendGalleries: appConfig.BackendGalleries,
+ ID: uuid.String(),
+ GalleryElementName: galleryID,
+ GalleryElement: &modelConfig,
+ BackendGalleries: appConfig.BackendGalleries,
}
return c.JSON(schema.GalleryResponse{
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index 119a02b55311..5d9ae821007d 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -18,7 +18,8 @@ func RegisterLocalAIRoutes(router *fiber.App,
cl *config.ModelConfigLoader,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
- galleryService *services.GalleryService) {
+ galleryService *services.GalleryService,
+ opcache *services.OpCache) {
router.Get("/swagger/*", swagger.HandlerDefault) // default
@@ -58,7 +59,7 @@ func RegisterLocalAIRoutes(router *fiber.App,
router.Post("/models/import", localai.ImportModelEndpoint(cl, appConfig))
// URI model import endpoint
- router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService))
+ router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache))
// Custom model edit endpoint
router.Post("/models/edit/:name", localai.EditModelEndpoint(cl, appConfig))
From 9c0fda137442aedcb065772860c2129a68086507 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 11 Nov 2025 22:22:52 +0100
Subject: [PATCH 06/16] wire importers to CLI
Signed-off-by: Ettore Di Giacinto
---
core/application/startup.go | 12 +++--
core/cli/models.go | 10 +++-
core/gallery/importers/importers.go | 15 ++++++
core/gallery/importers/llama-cpp.go | 2 +
core/http/endpoints/localai/import_model.go | 11 +---
core/startup/model_preload.go | 57 ++++++++++++++++++++-
core/startup/model_preload_test.go | 4 +-
7 files changed, 91 insertions(+), 20 deletions(-)
create mode 100644 core/gallery/importers/importers.go
diff --git a/core/application/startup.go b/core/application/startup.go
index a497e0e6057e..8e2387b9226f 100644
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -22,9 +22,15 @@ func New(opts ...config.AppOption) (*Application, error) {
log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.SystemState.Model.ModelsPath)
log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
+
+ if err := application.start(); err != nil {
+ return nil, err
+ }
+
caps, err := xsysinfo.CPUCapabilities()
if err == nil {
log.Debug().Msgf("CPU capabilities: %v", caps)
+
}
gpus, err := xsysinfo.GPUs()
if err == nil {
@@ -56,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
}
}
- if err := coreStartup.InstallModels(options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
+ if err := coreStartup.InstallModels(application.GalleryService(), options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
log.Error().Err(err).Msg("error installing models")
}
@@ -152,10 +158,6 @@ func New(opts ...config.AppOption) (*Application, error) {
// Watch the configuration directory
startWatcher(options)
- if err := application.start(); err != nil {
- return nil, err
- }
-
log.Info().Msg("core/startup process completed!")
return application, nil
}
diff --git a/core/cli/models.go b/core/cli/models.go
index 2b8da49c466a..dd5273317bfc 100644
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -1,12 +1,14 @@
package cli
import (
+ "context"
"encoding/json"
"errors"
"fmt"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
+ "github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/startup"
@@ -78,6 +80,12 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
return err
}
+ galleryService := services.NewGalleryService(&config.ApplicationConfig{}, model.NewModelLoader(systemState, true))
+ err = galleryService.Start(context.Background(), config.NewModelConfigLoader(mi.ModelsPath), systemState)
+ if err != nil {
+ return err
+ }
+
var galleries []config.Gallery
if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
log.Error().Err(err).Msg("unable to load galleries")
@@ -127,7 +135,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
}
modelLoader := model.NewModelLoader(systemState, true)
- err = startup.InstallModels(galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
+ err = startup.InstallModels(galleryService, galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
if err != nil {
return err
}
diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go
new file mode 100644
index 000000000000..5236af0cc245
--- /dev/null
+++ b/core/gallery/importers/importers.go
@@ -0,0 +1,15 @@
+package importers
+
+import (
+ "github.com/mudler/LocalAI/core/gallery"
+ "github.com/mudler/LocalAI/core/schema"
+)
+
+var DefaultImporters = []Importer{
+ &LlamaCPPImporter{},
+}
+
+type Importer interface {
+ Match(uri string, request schema.ImportModelRequest) bool
+ Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error)
+}
diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go
index e91afe9972d8..3596c868f50a 100644
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -12,6 +12,8 @@ import (
"go.yaml.in/yaml/v2"
)
+var _ Importer = &LlamaCPPImporter{}
+
type LlamaCPPImporter struct{}
func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest) bool {
diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go
index 73e47e9b549d..ea71ae53ae5d 100644
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -20,15 +20,6 @@ import (
"gopkg.in/yaml.v3"
)
-type Importer interface {
- Match(uri string, request schema.ImportModelRequest) bool
- Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error)
-}
-
-var defaultImporters = []Importer{
- &importers.LlamaCPPImporter{},
-}
-
// ImportModelURIEndpoint handles creating new model configurations from a URI
func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) fiber.Handler {
return func(c *fiber.Ctx) error {
@@ -42,7 +33,7 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
var err error
var modelConfig gallery.ModelConfig
- for _, importer := range defaultImporters {
+ for _, importer := range importers.DefaultImporters {
if importer.Match(input.URI, *input) {
modelConfig, err = importer.Import(input.URI, *input)
if err != nil {
diff --git a/core/startup/model_preload.go b/core/startup/model_preload.go
index a98e0592bd80..4550a25e46f3 100644
--- a/core/startup/model_preload.go
+++ b/core/startup/model_preload.go
@@ -7,9 +7,14 @@ import (
"path"
"path/filepath"
"strings"
+ "time"
+ "github.com/google/uuid"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
+ "github.com/mudler/LocalAI/core/gallery/importers"
+ "github.com/mudler/LocalAI/core/schema"
+ "github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/model"
"github.com/mudler/LocalAI/pkg/system"
@@ -25,7 +30,7 @@ const (
// InstallModels will preload models from the given list of URLs and galleries
// It will download the model if it is not already present in the model path
// It will also try to resolve if the model is an embedded model YAML configuration
-func InstallModels(galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
+func InstallModels(galleryService *services.GalleryService, galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
// create an error that groups all errors
var err error
@@ -154,7 +159,55 @@ func InstallModels(galleries, backendGalleries []config.Gallery, systemState *sy
err = errors.Join(err, e)
} else if !found {
log.Warn().Msgf("[startup] failed resolving model '%s'", url)
- err = errors.Join(err, fmt.Errorf("failed resolving model '%s'", url))
+
+ if galleryService == nil {
+ err = errors.Join(err, fmt.Errorf("cannot start autoimporter, not sure how to handle this uri"))
+ continue
+ }
+
+ // TODO: start autoimporter
+ var err error
+ var modelConfig gallery.ModelConfig
+ for _, importer := range importers.DefaultImporters {
+ if importer.Match(url, schema.ImportModelRequest{}) {
+ modelConfig, err = importer.Import(url, schema.ImportModelRequest{})
+ if err != nil {
+ continue
+ }
+ break
+ }
+ }
+
+ uuid, err := uuid.NewUUID()
+ if err != nil {
+ return err
+ }
+
+ galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
+ Req: gallery.GalleryModel{
+ Overrides: map[string]interface{}{},
+ },
+ ID: uuid.String(),
+ GalleryElementName: modelConfig.Name,
+ GalleryElement: &modelConfig,
+ BackendGalleries: backendGalleries,
+ }
+
+ var status *services.GalleryOpStatus
+ // wait for op to finish
+ for {
+ status = galleryService.GetStatus(uuid.String())
+ if status != nil && status.Processed {
+ break
+ }
+ time.Sleep(1 * time.Second)
+ }
+
+ if status.Error != nil {
+ return status.Error
+ }
+
+ log.Info().Msgf("[startup] imported model '%s' from '%s'", modelConfig.Name, url)
}
}
}
diff --git a/core/startup/model_preload_test.go b/core/startup/model_preload_test.go
index 16096f41befa..255c3dc4e65a 100644
--- a/core/startup/model_preload_test.go
+++ b/core/startup/model_preload_test.go
@@ -33,7 +33,7 @@ var _ = Describe("Preload test", func() {
url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
fileName := fmt.Sprintf("%s.yaml", "phi-2")
- InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
+ InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
resultFile := filepath.Join(tmpdir, fileName)
@@ -46,7 +46,7 @@ var _ = Describe("Preload test", func() {
url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
- err := InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
+ err := InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
Expect(err).ToNot(HaveOccurred())
resultFile := filepath.Join(tmpdir, fileName)
From af6070ac602235dc371d8ca95a216623fa8725cf Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Tue, 11 Nov 2025 22:30:39 +0100
Subject: [PATCH 07/16] Add omitempty to config fields
Signed-off-by: Ettore Di Giacinto
---
core/config/model_config.go | 292 ++++++++++++++++++------------------
core/schema/prediction.go | 49 +++---
core/schema/request.go | 3 +-
pkg/functions/parse.go | 59 ++++----
4 files changed, 208 insertions(+), 195 deletions(-)
diff --git a/core/config/model_config.go b/core/config/model_config.go
index 1dee82363d2f..41664f2a3dd4 100644
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -16,30 +16,31 @@ const (
RAND_SEED = -1
)
+// @Description TTS configuration
type TTSConfig struct {
// Voice wav path or id
- Voice string `yaml:"voice" json:"voice"`
+ Voice string `yaml:"voice,omitempty" json:"voice,omitempty"`
- AudioPath string `yaml:"audio_path" json:"audio_path"`
+ AudioPath string `yaml:"audio_path,omitempty" json:"audio_path,omitempty"`
}
-// ModelConfig represents a model configuration
+// @Description ModelConfig represents a model configuration
type ModelConfig struct {
modelConfigFile string `yaml:"-" json:"-"`
- schema.PredictionOptions `yaml:"parameters" json:"parameters"`
- Name string `yaml:"name" json:"name"`
-
- F16 *bool `yaml:"f16" json:"f16"`
- Threads *int `yaml:"threads" json:"threads"`
- Debug *bool `yaml:"debug" json:"debug"`
- Roles map[string]string `yaml:"roles" json:"roles"`
- Embeddings *bool `yaml:"embeddings" json:"embeddings"`
- Backend string `yaml:"backend" json:"backend"`
- TemplateConfig TemplateConfig `yaml:"template" json:"template"`
- KnownUsecaseStrings []string `yaml:"known_usecases" json:"known_usecases"`
+ schema.PredictionOptions `yaml:"parameters,omitempty" json:"parameters,omitempty"`
+ Name string `yaml:"name,omitempty" json:"name,omitempty"`
+
+ F16 *bool `yaml:"f16,omitempty" json:"f16,omitempty"`
+ Threads *int `yaml:"threads,omitempty" json:"threads,omitempty"`
+ Debug *bool `yaml:"debug,omitempty" json:"debug,omitempty"`
+ Roles map[string]string `yaml:"roles,omitempty" json:"roles,omitempty"`
+ Embeddings *bool `yaml:"embeddings,omitempty" json:"embeddings,omitempty"`
+ Backend string `yaml:"backend,omitempty" json:"backend,omitempty"`
+ TemplateConfig TemplateConfig `yaml:"template,omitempty" json:"template,omitempty"`
+ KnownUsecaseStrings []string `yaml:"known_usecases,omitempty" json:"known_usecases,omitempty"`
KnownUsecases *ModelConfigUsecases `yaml:"-" json:"-"`
- Pipeline Pipeline `yaml:"pipeline" json:"pipeline"`
+ Pipeline Pipeline `yaml:"pipeline,omitempty" json:"pipeline,omitempty"`
PromptStrings, InputStrings []string `yaml:"-" json:"-"`
InputToken [][]int `yaml:"-" json:"-"`
@@ -47,50 +48,52 @@ type ModelConfig struct {
ResponseFormat string `yaml:"-" json:"-"`
ResponseFormatMap map[string]interface{} `yaml:"-" json:"-"`
- FunctionsConfig functions.FunctionsConfig `yaml:"function" json:"function"`
+ FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
- FeatureFlag FeatureFlag `yaml:"feature_flags" json:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
+ FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
// LLM configs (GPT4ALL, Llama.cpp, ...)
LLMConfig `yaml:",inline" json:",inline"`
// Diffusers
- Diffusers Diffusers `yaml:"diffusers" json:"diffusers"`
- Step int `yaml:"step" json:"step"`
+ Diffusers Diffusers `yaml:"diffusers,omitempty" json:"diffusers,omitempty"`
+ Step int `yaml:"step,omitempty" json:"step,omitempty"`
// GRPC Options
- GRPC GRPC `yaml:"grpc" json:"grpc"`
+ GRPC GRPC `yaml:"grpc,omitempty" json:"grpc,omitempty"`
// TTS specifics
- TTSConfig `yaml:"tts" json:"tts"`
+ TTSConfig `yaml:"tts,omitempty" json:"tts,omitempty"`
// CUDA
// Explicitly enable CUDA or not (some backends might need it)
- CUDA bool `yaml:"cuda" json:"cuda"`
+ CUDA bool `yaml:"cuda,omitempty" json:"cuda,omitempty"`
- DownloadFiles []File `yaml:"download_files" json:"download_files"`
+ DownloadFiles []File `yaml:"download_files,omitempty" json:"download_files,omitempty"`
- Description string `yaml:"description" json:"description"`
- Usage string `yaml:"usage" json:"usage"`
+ Description string `yaml:"description,omitempty" json:"description,omitempty"`
+ Usage string `yaml:"usage,omitempty" json:"usage,omitempty"`
- Options []string `yaml:"options" json:"options"`
- Overrides []string `yaml:"overrides" json:"overrides"`
+ Options []string `yaml:"options,omitempty" json:"options,omitempty"`
+ Overrides []string `yaml:"overrides,omitempty" json:"overrides,omitempty"`
- MCP MCPConfig `yaml:"mcp" json:"mcp"`
- Agent AgentConfig `yaml:"agent" json:"agent"`
+ MCP MCPConfig `yaml:"mcp,omitempty" json:"mcp,omitempty"`
+ Agent AgentConfig `yaml:"agent,omitempty" json:"agent,omitempty"`
}
+// @Description MCP configuration
type MCPConfig struct {
- Servers string `yaml:"remote" json:"remote"`
- Stdio string `yaml:"stdio" json:"stdio"`
+ Servers string `yaml:"remote,omitempty" json:"remote,omitempty"`
+ Stdio string `yaml:"stdio,omitempty" json:"stdio,omitempty"`
}
+// @Description Agent configuration
type AgentConfig struct {
- MaxAttempts int `yaml:"max_attempts" json:"max_attempts"`
- MaxIterations int `yaml:"max_iterations" json:"max_iterations"`
- EnableReasoning bool `yaml:"enable_reasoning" json:"enable_reasoning"`
- EnablePlanning bool `yaml:"enable_planning" json:"enable_planning"`
- EnableMCPPrompts bool `yaml:"enable_mcp_prompts" json:"enable_mcp_prompts"`
- EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator" json:"enable_plan_re_evaluator"`
+ MaxAttempts int `yaml:"max_attempts,omitempty" json:"max_attempts,omitempty"`
+ MaxIterations int `yaml:"max_iterations,omitempty" json:"max_iterations,omitempty"`
+ EnableReasoning bool `yaml:"enable_reasoning,omitempty" json:"enable_reasoning,omitempty"`
+ EnablePlanning bool `yaml:"enable_planning,omitempty" json:"enable_planning,omitempty"`
+ EnableMCPPrompts bool `yaml:"enable_mcp_prompts,omitempty" json:"enable_mcp_prompts,omitempty"`
+ EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator,omitempty" json:"enable_plan_re_evaluator,omitempty"`
}
func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCPGenericConfig[MCPSTDIOServers], error) {
@@ -107,35 +110,39 @@ func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCP
return remote, stdio, nil
}
+// @Description MCP generic configuration
type MCPGenericConfig[T any] struct {
- Servers T `yaml:"mcpServers" json:"mcpServers"`
+ Servers T `yaml:"mcpServers,omitempty" json:"mcpServers,omitempty"`
}
type MCPRemoteServers map[string]MCPRemoteServer
type MCPSTDIOServers map[string]MCPSTDIOServer
+// @Description MCP remote server configuration
type MCPRemoteServer struct {
- URL string `json:"url"`
- Token string `json:"token"`
+ URL string `json:"url,omitempty"`
+ Token string `json:"token,omitempty"`
}
+// @Description MCP STDIO server configuration
type MCPSTDIOServer struct {
- Args []string `json:"args"`
- Env map[string]string `json:"env"`
- Command string `json:"command"`
+ Args []string `json:"args,omitempty"`
+ Env map[string]string `json:"env,omitempty"`
+ Command string `json:"command,omitempty"`
}
-// Pipeline defines other models to use for audio-to-audio
+// @Description Pipeline defines other models to use for audio-to-audio
type Pipeline struct {
- TTS string `yaml:"tts" json:"tts"`
- LLM string `yaml:"llm" json:"llm"`
- Transcription string `yaml:"transcription" json:"transcription"`
- VAD string `yaml:"vad" json:"vad"`
+ TTS string `yaml:"tts,omitempty" json:"tts,omitempty"`
+ LLM string `yaml:"llm,omitempty" json:"llm,omitempty"`
+ Transcription string `yaml:"transcription,omitempty" json:"transcription,omitempty"`
+ VAD string `yaml:"vad,omitempty" json:"vad,omitempty"`
}
+// @Description File configuration for model downloads
type File struct {
- Filename string `yaml:"filename" json:"filename"`
- SHA256 string `yaml:"sha256" json:"sha256"`
- URI downloader.URI `yaml:"uri" json:"uri"`
+ Filename string `yaml:"filename,omitempty" json:"filename,omitempty"`
+ SHA256 string `yaml:"sha256,omitempty" json:"sha256,omitempty"`
+ URI downloader.URI `yaml:"uri,omitempty" json:"uri,omitempty"`
}
type FeatureFlag map[string]*bool
@@ -147,124 +154,125 @@ func (ff FeatureFlag) Enabled(s string) bool {
return false
}
+// @Description GRPC configuration
type GRPC struct {
- Attempts int `yaml:"attempts" json:"attempts"`
- AttemptsSleepTime int `yaml:"attempts_sleep_time" json:"attempts_sleep_time"`
+ Attempts int `yaml:"attempts,omitempty" json:"attempts,omitempty"`
+ AttemptsSleepTime int `yaml:"attempts_sleep_time,omitempty" json:"attempts_sleep_time,omitempty"`
}
+// @Description Diffusers configuration
type Diffusers struct {
- CUDA bool `yaml:"cuda" json:"cuda"`
- PipelineType string `yaml:"pipeline_type" json:"pipeline_type"`
- SchedulerType string `yaml:"scheduler_type" json:"scheduler_type"`
- EnableParameters string `yaml:"enable_parameters" json:"enable_parameters"` // A list of comma separated parameters to specify
- IMG2IMG bool `yaml:"img2img" json:"img2img"` // Image to Image Diffuser
- ClipSkip int `yaml:"clip_skip" json:"clip_skip"` // Skip every N frames
- ClipModel string `yaml:"clip_model" json:"clip_model"` // Clip model to use
- ClipSubFolder string `yaml:"clip_subfolder" json:"clip_subfolder"` // Subfolder to use for clip model
- ControlNet string `yaml:"control_net" json:"control_net"`
-}
-
-// LLMConfig is a struct that holds the configuration that are
-// generic for most of the LLM backends.
+ CUDA bool `yaml:"cuda,omitempty" json:"cuda,omitempty"`
+ PipelineType string `yaml:"pipeline_type,omitempty" json:"pipeline_type,omitempty"`
+ SchedulerType string `yaml:"scheduler_type,omitempty" json:"scheduler_type,omitempty"`
+ EnableParameters string `yaml:"enable_parameters,omitempty" json:"enable_parameters,omitempty"` // A list of comma separated parameters to specify
+ IMG2IMG bool `yaml:"img2img,omitempty" json:"img2img,omitempty"` // Image to Image Diffuser
+ ClipSkip int `yaml:"clip_skip,omitempty" json:"clip_skip,omitempty"` // Skip every N frames
+ ClipModel string `yaml:"clip_model,omitempty" json:"clip_model,omitempty"` // Clip model to use
+ ClipSubFolder string `yaml:"clip_subfolder,omitempty" json:"clip_subfolder,omitempty"` // Subfolder to use for clip model
+ ControlNet string `yaml:"control_net,omitempty" json:"control_net,omitempty"`
+}
+
+// @Description LLMConfig is a struct that holds the configuration that are generic for most of the LLM backends.
type LLMConfig struct {
- SystemPrompt string `yaml:"system_prompt" json:"system_prompt"`
- TensorSplit string `yaml:"tensor_split" json:"tensor_split"`
- MainGPU string `yaml:"main_gpu" json:"main_gpu"`
- RMSNormEps float32 `yaml:"rms_norm_eps" json:"rms_norm_eps"`
- NGQA int32 `yaml:"ngqa" json:"ngqa"`
- PromptCachePath string `yaml:"prompt_cache_path" json:"prompt_cache_path"`
- PromptCacheAll bool `yaml:"prompt_cache_all" json:"prompt_cache_all"`
- PromptCacheRO bool `yaml:"prompt_cache_ro" json:"prompt_cache_ro"`
- MirostatETA *float64 `yaml:"mirostat_eta" json:"mirostat_eta"`
- MirostatTAU *float64 `yaml:"mirostat_tau" json:"mirostat_tau"`
- Mirostat *int `yaml:"mirostat" json:"mirostat"`
- NGPULayers *int `yaml:"gpu_layers" json:"gpu_layers"`
- MMap *bool `yaml:"mmap" json:"mmap"`
- MMlock *bool `yaml:"mmlock" json:"mmlock"`
- LowVRAM *bool `yaml:"low_vram" json:"low_vram"`
- Reranking *bool `yaml:"reranking" json:"reranking"`
- Grammar string `yaml:"grammar" json:"grammar"`
- StopWords []string `yaml:"stopwords" json:"stopwords"`
- Cutstrings []string `yaml:"cutstrings" json:"cutstrings"`
- ExtractRegex []string `yaml:"extract_regex" json:"extract_regex"`
- TrimSpace []string `yaml:"trimspace" json:"trimspace"`
- TrimSuffix []string `yaml:"trimsuffix" json:"trimsuffix"`
-
- ContextSize *int `yaml:"context_size" json:"context_size"`
- NUMA bool `yaml:"numa" json:"numa"`
- LoraAdapter string `yaml:"lora_adapter" json:"lora_adapter"`
- LoraBase string `yaml:"lora_base" json:"lora_base"`
- LoraAdapters []string `yaml:"lora_adapters" json:"lora_adapters"`
- LoraScales []float32 `yaml:"lora_scales" json:"lora_scales"`
- LoraScale float32 `yaml:"lora_scale" json:"lora_scale"`
- NoMulMatQ bool `yaml:"no_mulmatq" json:"no_mulmatq"`
- DraftModel string `yaml:"draft_model" json:"draft_model"`
- NDraft int32 `yaml:"n_draft" json:"n_draft"`
- Quantization string `yaml:"quantization" json:"quantization"`
- LoadFormat string `yaml:"load_format" json:"load_format"`
- GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization" json:"gpu_memory_utilization"` // vLLM
- TrustRemoteCode bool `yaml:"trust_remote_code" json:"trust_remote_code"` // vLLM
- EnforceEager bool `yaml:"enforce_eager" json:"enforce_eager"` // vLLM
- SwapSpace int `yaml:"swap_space" json:"swap_space"` // vLLM
- MaxModelLen int `yaml:"max_model_len" json:"max_model_len"` // vLLM
- TensorParallelSize int `yaml:"tensor_parallel_size" json:"tensor_parallel_size"` // vLLM
- DisableLogStatus bool `yaml:"disable_log_stats" json:"disable_log_stats"` // vLLM
- DType string `yaml:"dtype" json:"dtype"` // vLLM
- LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt" json:"limit_mm_per_prompt"` // vLLM
- MMProj string `yaml:"mmproj" json:"mmproj"`
-
- FlashAttention *string `yaml:"flash_attention" json:"flash_attention"`
- NoKVOffloading bool `yaml:"no_kv_offloading" json:"no_kv_offloading"`
- CacheTypeK string `yaml:"cache_type_k" json:"cache_type_k"`
- CacheTypeV string `yaml:"cache_type_v" json:"cache_type_v"`
-
- RopeScaling string `yaml:"rope_scaling" json:"rope_scaling"`
- ModelType string `yaml:"type" json:"type"`
-
- YarnExtFactor float32 `yaml:"yarn_ext_factor" json:"yarn_ext_factor"`
- YarnAttnFactor float32 `yaml:"yarn_attn_factor" json:"yarn_attn_factor"`
- YarnBetaFast float32 `yaml:"yarn_beta_fast" json:"yarn_beta_fast"`
- YarnBetaSlow float32 `yaml:"yarn_beta_slow" json:"yarn_beta_slow"`
-
- CFGScale float32 `yaml:"cfg_scale" json:"cfg_scale"` // Classifier-Free Guidance Scale
-}
-
-// LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
+ SystemPrompt string `yaml:"system_prompt,omitempty" json:"system_prompt,omitempty"`
+ TensorSplit string `yaml:"tensor_split,omitempty" json:"tensor_split,omitempty"`
+ MainGPU string `yaml:"main_gpu,omitempty" json:"main_gpu,omitempty"`
+ RMSNormEps float32 `yaml:"rms_norm_eps,omitempty" json:"rms_norm_eps,omitempty"`
+ NGQA int32 `yaml:"ngqa,omitempty" json:"ngqa,omitempty"`
+ PromptCachePath string `yaml:"prompt_cache_path,omitempty" json:"prompt_cache_path,omitempty"`
+ PromptCacheAll bool `yaml:"prompt_cache_all,omitempty" json:"prompt_cache_all,omitempty"`
+ PromptCacheRO bool `yaml:"prompt_cache_ro,omitempty" json:"prompt_cache_ro,omitempty"`
+ MirostatETA *float64 `yaml:"mirostat_eta,omitempty" json:"mirostat_eta,omitempty"`
+ MirostatTAU *float64 `yaml:"mirostat_tau,omitempty" json:"mirostat_tau,omitempty"`
+ Mirostat *int `yaml:"mirostat,omitempty" json:"mirostat,omitempty"`
+ NGPULayers *int `yaml:"gpu_layers,omitempty" json:"gpu_layers,omitempty"`
+ MMap *bool `yaml:"mmap,omitempty" json:"mmap,omitempty"`
+ MMlock *bool `yaml:"mmlock,omitempty" json:"mmlock,omitempty"`
+ LowVRAM *bool `yaml:"low_vram,omitempty" json:"low_vram,omitempty"`
+ Reranking *bool `yaml:"reranking,omitempty" json:"reranking,omitempty"`
+ Grammar string `yaml:"grammar,omitempty" json:"grammar,omitempty"`
+ StopWords []string `yaml:"stopwords,omitempty" json:"stopwords,omitempty"`
+ Cutstrings []string `yaml:"cutstrings,omitempty" json:"cutstrings,omitempty"`
+ ExtractRegex []string `yaml:"extract_regex,omitempty" json:"extract_regex,omitempty"`
+ TrimSpace []string `yaml:"trimspace,omitempty" json:"trimspace,omitempty"`
+ TrimSuffix []string `yaml:"trimsuffix,omitempty" json:"trimsuffix,omitempty"`
+
+ ContextSize *int `yaml:"context_size,omitempty" json:"context_size,omitempty"`
+ NUMA bool `yaml:"numa,omitempty" json:"numa,omitempty"`
+ LoraAdapter string `yaml:"lora_adapter,omitempty" json:"lora_adapter,omitempty"`
+ LoraBase string `yaml:"lora_base,omitempty" json:"lora_base,omitempty"`
+ LoraAdapters []string `yaml:"lora_adapters,omitempty" json:"lora_adapters,omitempty"`
+ LoraScales []float32 `yaml:"lora_scales,omitempty" json:"lora_scales,omitempty"`
+ LoraScale float32 `yaml:"lora_scale,omitempty" json:"lora_scale,omitempty"`
+ NoMulMatQ bool `yaml:"no_mulmatq,omitempty" json:"no_mulmatq,omitempty"`
+ DraftModel string `yaml:"draft_model,omitempty" json:"draft_model,omitempty"`
+ NDraft int32 `yaml:"n_draft,omitempty" json:"n_draft,omitempty"`
+ Quantization string `yaml:"quantization,omitempty" json:"quantization,omitempty"`
+ LoadFormat string `yaml:"load_format,omitempty" json:"load_format,omitempty"`
+ GPUMemoryUtilization float32 `yaml:"gpu_memory_utilization,omitempty" json:"gpu_memory_utilization,omitempty"` // vLLM
+ TrustRemoteCode bool `yaml:"trust_remote_code,omitempty" json:"trust_remote_code,omitempty"` // vLLM
+ EnforceEager bool `yaml:"enforce_eager,omitempty" json:"enforce_eager,omitempty"` // vLLM
+ SwapSpace int `yaml:"swap_space,omitempty" json:"swap_space,omitempty"` // vLLM
+ MaxModelLen int `yaml:"max_model_len,omitempty" json:"max_model_len,omitempty"` // vLLM
+ TensorParallelSize int `yaml:"tensor_parallel_size,omitempty" json:"tensor_parallel_size,omitempty"` // vLLM
+ DisableLogStatus bool `yaml:"disable_log_stats,omitempty" json:"disable_log_stats,omitempty"` // vLLM
+ DType string `yaml:"dtype,omitempty" json:"dtype,omitempty"` // vLLM
+ LimitMMPerPrompt LimitMMPerPrompt `yaml:"limit_mm_per_prompt,omitempty" json:"limit_mm_per_prompt,omitempty"` // vLLM
+ MMProj string `yaml:"mmproj,omitempty" json:"mmproj,omitempty"`
+
+ FlashAttention *string `yaml:"flash_attention,omitempty" json:"flash_attention,omitempty"`
+ NoKVOffloading bool `yaml:"no_kv_offloading,omitempty" json:"no_kv_offloading,omitempty"`
+ CacheTypeK string `yaml:"cache_type_k,omitempty" json:"cache_type_k,omitempty"`
+ CacheTypeV string `yaml:"cache_type_v,omitempty" json:"cache_type_v,omitempty"`
+
+ RopeScaling string `yaml:"rope_scaling,omitempty" json:"rope_scaling,omitempty"`
+ ModelType string `yaml:"type,omitempty" json:"type,omitempty"`
+
+ YarnExtFactor float32 `yaml:"yarn_ext_factor,omitempty" json:"yarn_ext_factor,omitempty"`
+ YarnAttnFactor float32 `yaml:"yarn_attn_factor,omitempty" json:"yarn_attn_factor,omitempty"`
+ YarnBetaFast float32 `yaml:"yarn_beta_fast,omitempty" json:"yarn_beta_fast,omitempty"`
+ YarnBetaSlow float32 `yaml:"yarn_beta_slow,omitempty" json:"yarn_beta_slow,omitempty"`
+
+ CFGScale float32 `yaml:"cfg_scale,omitempty" json:"cfg_scale,omitempty"` // Classifier-Free Guidance Scale
+}
+
+// @Description LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
type LimitMMPerPrompt struct {
- LimitImagePerPrompt int `yaml:"image" json:"image"`
- LimitVideoPerPrompt int `yaml:"video" json:"video"`
- LimitAudioPerPrompt int `yaml:"audio" json:"audio"`
+ LimitImagePerPrompt int `yaml:"image,omitempty" json:"image,omitempty"`
+ LimitVideoPerPrompt int `yaml:"video,omitempty" json:"video,omitempty"`
+ LimitAudioPerPrompt int `yaml:"audio,omitempty" json:"audio,omitempty"`
}
-// TemplateConfig is a struct that holds the configuration of the templating system
+// @Description TemplateConfig is a struct that holds the configuration of the templating system
type TemplateConfig struct {
// Chat is the template used in the chat completion endpoint
- Chat string `yaml:"chat" json:"chat"`
+ Chat string `yaml:"chat,omitempty" json:"chat,omitempty"`
// ChatMessage is the template used for chat messages
- ChatMessage string `yaml:"chat_message" json:"chat_message"`
+ ChatMessage string `yaml:"chat_message,omitempty" json:"chat_message,omitempty"`
// Completion is the template used for completion requests
- Completion string `yaml:"completion" json:"completion"`
+ Completion string `yaml:"completion,omitempty" json:"completion,omitempty"`
// Edit is the template used for edit completion requests
- Edit string `yaml:"edit" json:"edit"`
+ Edit string `yaml:"edit,omitempty" json:"edit,omitempty"`
// Functions is the template used when tools are present in the client requests
- Functions string `yaml:"function" json:"function"`
+ Functions string `yaml:"function,omitempty" json:"function,omitempty"`
// UseTokenizerTemplate is a flag that indicates if the tokenizer template should be used.
// Note: this is mostly consumed for backends such as vllm and transformers
// that can use the tokenizers specified in the JSON config files of the models
- UseTokenizerTemplate bool `yaml:"use_tokenizer_template" json:"use_tokenizer_template"`
+ UseTokenizerTemplate bool `yaml:"use_tokenizer_template,omitempty" json:"use_tokenizer_template,omitempty"`
// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
// It defaults to \n
- JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character" json:"join_chat_messages_by_character"`
+ JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character,omitempty" json:"join_chat_messages_by_character,omitempty"`
- Multimodal string `yaml:"multimodal" json:"multimodal"`
+ Multimodal string `yaml:"multimodal,omitempty" json:"multimodal,omitempty"`
- ReplyPrefix string `yaml:"reply_prefix" json:"reply_prefix"`
+ ReplyPrefix string `yaml:"reply_prefix,omitempty" json:"reply_prefix,omitempty"`
}
func (c *ModelConfig) syncKnownUsecasesFromString() {
diff --git a/core/schema/prediction.go b/core/schema/prediction.go
index a75c7ab160af..40345ba50b14 100644
--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@@ -1,50 +1,51 @@
package schema
+// @Description PredictionOptions contains prediction parameters for model inference
type PredictionOptions struct {
// Also part of the OpenAI official spec
BasicModelRequest `yaml:",inline"`
// Also part of the OpenAI official spec
- Language string `json:"language"`
+ Language string `json:"language,omitempty" yaml:"language,omitempty"`
// Only for audio transcription
- Translate bool `json:"translate"`
+ Translate bool `json:"translate,omitempty" yaml:"translate,omitempty"`
// Also part of the OpenAI official spec. use it for returning multiple results
- N int `json:"n"`
+ N int `json:"n,omitempty" yaml:"n,omitempty"`
// Common options between all the API calls, part of the OpenAI spec
- TopP *float64 `json:"top_p" yaml:"top_p"`
- TopK *int `json:"top_k" yaml:"top_k"`
- Temperature *float64 `json:"temperature" yaml:"temperature"`
- Maxtokens *int `json:"max_tokens" yaml:"max_tokens"`
- Echo bool `json:"echo"`
+ TopP *float64 `json:"top_p,omitempty" yaml:"top_p,omitempty"`
+ TopK *int `json:"top_k,omitempty" yaml:"top_k,omitempty"`
+ Temperature *float64 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
+ Maxtokens *int `json:"max_tokens,omitempty" yaml:"max_tokens,omitempty"`
+ Echo bool `json:"echo,omitempty" yaml:"echo,omitempty"`
// Custom parameters - not present in the OpenAI API
- Batch int `json:"batch" yaml:"batch"`
- IgnoreEOS bool `json:"ignore_eos" yaml:"ignore_eos"`
- RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
+ Batch int `json:"batch,omitempty" yaml:"batch,omitempty"`
+ IgnoreEOS bool `json:"ignore_eos,omitempty" yaml:"ignore_eos,omitempty"`
+ RepeatPenalty float64 `json:"repeat_penalty,omitempty" yaml:"repeat_penalty,omitempty"`
- RepeatLastN int `json:"repeat_last_n" yaml:"repeat_last_n"`
+ RepeatLastN int `json:"repeat_last_n,omitempty" yaml:"repeat_last_n,omitempty"`
- Keep int `json:"n_keep" yaml:"n_keep"`
+ Keep int `json:"n_keep,omitempty" yaml:"n_keep,omitempty"`
- FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
- PresencePenalty float64 `json:"presence_penalty" yaml:"presence_penalty"`
- TFZ *float64 `json:"tfz" yaml:"tfz"`
+ FrequencyPenalty float64 `json:"frequency_penalty,omitempty" yaml:"frequency_penalty,omitempty"`
+ PresencePenalty float64 `json:"presence_penalty,omitempty" yaml:"presence_penalty,omitempty"`
+ TFZ *float64 `json:"tfz,omitempty" yaml:"tfz,omitempty"`
- TypicalP *float64 `json:"typical_p" yaml:"typical_p"`
- Seed *int `json:"seed" yaml:"seed"`
+ TypicalP *float64 `json:"typical_p,omitempty" yaml:"typical_p,omitempty"`
+ Seed *int `json:"seed,omitempty" yaml:"seed,omitempty"`
- NegativePrompt string `json:"negative_prompt" yaml:"negative_prompt"`
- RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
- RopeFreqScale float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
- NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
+ NegativePrompt string `json:"negative_prompt,omitempty" yaml:"negative_prompt,omitempty"`
+ RopeFreqBase float32 `json:"rope_freq_base,omitempty" yaml:"rope_freq_base,omitempty"`
+ RopeFreqScale float32 `json:"rope_freq_scale,omitempty" yaml:"rope_freq_scale,omitempty"`
+ NegativePromptScale float32 `json:"negative_prompt_scale,omitempty" yaml:"negative_prompt_scale,omitempty"`
// Diffusers
- ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
+ ClipSkip int `json:"clip_skip,omitempty" yaml:"clip_skip,omitempty"`
// RWKV (?)
- Tokenizer string `json:"tokenizer" yaml:"tokenizer"`
+ Tokenizer string `json:"tokenizer,omitempty" yaml:"tokenizer,omitempty"`
}
diff --git a/core/schema/request.go b/core/schema/request.go
index f55f39200ca4..8998a2736950 100644
--- a/core/schema/request.go
+++ b/core/schema/request.go
@@ -5,8 +5,9 @@ type LocalAIRequest interface {
ModelName(*string) string
}
+// @Description BasicModelRequest contains the basic model request fields
type BasicModelRequest struct {
- Model string `json:"model" yaml:"model"`
+ Model string `json:"model,omitempty" yaml:"model,omitempty"`
// TODO: Should this also include the following fields from the OpenAI side of the world?
// If so, changes should be made to core/http/middleware/request.go to match
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index 49c4970a7609..66a1e81ca8ab 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -13,99 +13,102 @@ import (
"github.com/rs/zerolog/log"
)
+// @Description GrammarConfig contains configuration for grammar parsing
type GrammarConfig struct {
// ParallelCalls enables the LLM to return multiple function calls in the same response
- ParallelCalls bool `yaml:"parallel_calls"`
+ ParallelCalls bool `yaml:"parallel_calls,omitempty" json:"parallel_calls,omitempty"`
- DisableParallelNewLines bool `yaml:"disable_parallel_new_lines"`
+ DisableParallelNewLines bool `yaml:"disable_parallel_new_lines,omitempty" json:"disable_parallel_new_lines,omitempty"`
// MixedMode enables the LLM to return strings and not only JSON objects
// This is useful for models to not constraining returning only JSON and also messages back to the user
- MixedMode bool `yaml:"mixed_mode"`
+ MixedMode bool `yaml:"mixed_mode,omitempty" json:"mixed_mode,omitempty"`
// NoMixedFreeString disables the mixed mode for free strings
// In this way if the LLM selects a free string, it won't be mixed necessarily with JSON objects.
// For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both
// If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict
- NoMixedFreeString bool `yaml:"no_mixed_free_string"`
+ NoMixedFreeString bool `yaml:"no_mixed_free_string,omitempty" json:"no_mixed_free_string,omitempty"`
// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
- NoGrammar bool `yaml:"disable"`
+ NoGrammar bool `yaml:"disable,omitempty" json:"disable,omitempty"`
// Prefix is the suffix to append to the grammar when being generated
// This is useful when models prepend a tag before returning JSON
- Prefix string `yaml:"prefix"`
+ Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
// ExpectStringsAfterJSON enables mixed string suffix
- ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"`
+ ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json,omitempty" json:"expect_strings_after_json,omitempty"`
// PropOrder selects what order to print properties
// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
- PropOrder string `yaml:"properties_order"`
+ PropOrder string `yaml:"properties_order,omitempty" json:"properties_order,omitempty"`
// SchemaType can be configured to use a specific schema type to force the grammar
// available : json, llama3.1
- SchemaType string `yaml:"schema_type"`
+ SchemaType string `yaml:"schema_type,omitempty" json:"schema_type,omitempty"`
- GrammarTriggers []GrammarTrigger `yaml:"triggers"`
+ GrammarTriggers []GrammarTrigger `yaml:"triggers,omitempty" json:"triggers,omitempty"`
}
+// @Description GrammarTrigger defines a trigger word for grammar parsing
type GrammarTrigger struct {
// Trigger is the string that triggers the grammar
- Word string `yaml:"word"`
+ Word string `yaml:"word,omitempty" json:"word,omitempty"`
}
-// FunctionsConfig is the configuration for the tool/function call.
+// @Description FunctionsConfig is the configuration for the tool/function call.
// It includes setting to map the function name and arguments from the response
// and, for instance, also if processing the requests with BNF grammars.
type FunctionsConfig struct {
// DisableNoAction disables the "no action" tool
// By default we inject a tool that does nothing and is used to return an answer from the LLM
- DisableNoAction bool `yaml:"disable_no_action"`
+ DisableNoAction bool `yaml:"disable_no_action,omitempty" json:"disable_no_action,omitempty"`
// Grammar is the configuration for the grammar
- GrammarConfig GrammarConfig `yaml:"grammar"`
+ GrammarConfig GrammarConfig `yaml:"grammar,omitempty" json:"grammar,omitempty"`
// NoActionFunctionName is the name of the function that does nothing. It defaults to "answer"
- NoActionFunctionName string `yaml:"no_action_function_name"`
+ NoActionFunctionName string `yaml:"no_action_function_name,omitempty" json:"no_action_function_name,omitempty"`
// NoActionDescriptionName is the name of the function that returns the description of the no action function
- NoActionDescriptionName string `yaml:"no_action_description_name"`
+ NoActionDescriptionName string `yaml:"no_action_description_name,omitempty" json:"no_action_description_name,omitempty"`
// ResponseRegex is a named regex to extract the function name and arguments from the response
- ResponseRegex []string `yaml:"response_regex"`
+ ResponseRegex []string `yaml:"response_regex,omitempty" json:"response_regex,omitempty"`
// JSONRegexMatch is a regex to extract the JSON object from the response
- JSONRegexMatch []string `yaml:"json_regex_match"`
+ JSONRegexMatch []string `yaml:"json_regex_match,omitempty" json:"json_regex_match,omitempty"`
// ArgumentRegex is a named regex to extract the arguments from the response. Use ArgumentRegexKey and ArgumentRegexValue to set the names of the named regex for key and value of the arguments.
- ArgumentRegex []string `yaml:"argument_regex"`
+ ArgumentRegex []string `yaml:"argument_regex,omitempty" json:"argument_regex,omitempty"`
// ArgumentRegex named regex names for key and value extractions. default: key and value
- ArgumentRegexKey string `yaml:"argument_regex_key_name"` // default: key
- ArgumentRegexValue string `yaml:"argument_regex_value_name"` // default: value
+ ArgumentRegexKey string `yaml:"argument_regex_key_name,omitempty" json:"argument_regex_key_name,omitempty"` // default: key
+ ArgumentRegexValue string `yaml:"argument_regex_value_name,omitempty" json:"argument_regex_value_name,omitempty"` // default: value
// ReplaceFunctionResults allow to replace strings in the results before parsing them
- ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results"`
+ ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results,omitempty" json:"replace_function_results,omitempty"`
// ReplaceLLMResult allow to replace strings in the results before parsing them
- ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results"`
+ ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results,omitempty" json:"replace_llm_results,omitempty"`
// CaptureLLMResult is a regex to extract a string from the LLM response
// that is used as return string when using tools.
// This is useful for e.g. if the LLM outputs a reasoning and we want to get the reasoning as a string back
- CaptureLLMResult []string `yaml:"capture_llm_results"`
+ CaptureLLMResult []string `yaml:"capture_llm_results,omitempty" json:"capture_llm_results,omitempty"`
// FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
// instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }.
// This might be useful for certain models trained with the function name as the first token.
- FunctionNameKey string `yaml:"function_name_key"`
- FunctionArgumentsKey string `yaml:"function_arguments_key"`
+ FunctionNameKey string `yaml:"function_name_key,omitempty" json:"function_name_key,omitempty"`
+ FunctionArgumentsKey string `yaml:"function_arguments_key,omitempty" json:"function_arguments_key,omitempty"`
}
+// @Description ReplaceResult defines a key-value replacement for function results
type ReplaceResult struct {
- Key string `yaml:"key"`
- Value string `yaml:"value"`
+ Key string `yaml:"key,omitempty" json:"key,omitempty"`
+ Value string `yaml:"value,omitempty" json:"value,omitempty"`
}
type FuncCallResults struct {
From 2f4f8bd46da3bdeab804ea4ec39ae918dedf7d89 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 12 Nov 2025 09:38:42 +0100
Subject: [PATCH 08/16] Fix tests
Signed-off-by: Ettore Di Giacinto
---
pkg/huggingface-api/client_test.go | 56 +++++++++++++++++++++---------
1 file changed, 39 insertions(+), 17 deletions(-)
diff --git a/pkg/huggingface-api/client_test.go b/pkg/huggingface-api/client_test.go
index 37a381416125..ffa35bb90a94 100644
--- a/pkg/huggingface-api/client_test.go
+++ b/pkg/huggingface-api/client_test.go
@@ -270,6 +270,15 @@ var _ = Describe("HuggingFace API Client", func() {
})
})
+ Context("when getting file SHA on remote model", func() {
+ It("should get file SHA successfully", func() {
+ sha, err := client.GetFileSHA(
+ "mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF", "localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf")
+ Expect(err).ToNot(HaveOccurred())
+ Expect(sha).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"))
+ })
+ })
+
Context("when listing files", func() {
BeforeEach(func() {
mockFilesResponse := `[
@@ -329,23 +338,25 @@ var _ = Describe("HuggingFace API Client", func() {
Context("when getting file SHA", func() {
BeforeEach(func() {
- mockFileInfoResponse := `{
- "path": "model-Q4_K_M.gguf",
- "size": 1000000,
- "oid": "abc123",
- "lfs": {
- "oid": "sha256:def456",
+ mockFilesResponse := `[
+ {
+ "type": "file",
+ "path": "model-Q4_K_M.gguf",
"size": 1000000,
- "pointer": "version https://git-lfs.github.com/spec/v1",
- "sha256": "def456789"
+ "oid": "abc123",
+ "lfs": {
+ "oid": "def456789",
+ "size": 1000000,
+ "pointerSize": 135
+ }
}
- }`
+ ]`
server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- if strings.Contains(r.URL.Path, "/paths-info") {
+ if strings.Contains(r.URL.Path, "/tree/main") {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
- w.Write([]byte(mockFileInfoResponse))
+ w.Write([]byte(mockFilesResponse))
} else {
w.WriteHeader(http.StatusNotFound)
}
@@ -363,18 +374,29 @@ var _ = Describe("HuggingFace API Client", func() {
It("should handle missing SHA gracefully", func() {
server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- w.Header().Set("Content-Type", "application/json")
- w.WriteHeader(http.StatusOK)
- w.Write([]byte(`{"path": "file.txt", "size": 100}`))
+ if strings.Contains(r.URL.Path, "/tree/main") {
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(http.StatusOK)
+ w.Write([]byte(`[
+ {
+ "type": "file",
+ "path": "file.txt",
+ "size": 100,
+ "oid": "file123"
+ }
+ ]`))
+ } else {
+ w.WriteHeader(http.StatusNotFound)
+ }
}))
client.SetBaseURL(server.URL)
sha, err := client.GetFileSHA("test/model", "file.txt")
- Expect(err).To(HaveOccurred())
- Expect(err.Error()).To(ContainSubstring("no SHA256 found"))
- Expect(sha).To(Equal(""))
+ Expect(err).ToNot(HaveOccurred())
+ // When there's no LFS, it should return the OID
+ Expect(sha).To(Equal("file123"))
})
})
From 8937998c744dc752af88da850e0605e2055ef3bb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 12 Nov 2025 09:44:49 +0100
Subject: [PATCH 09/16] Add MLX importer
Signed-off-by: Ettore Di Giacinto
---
core/gallery/importers/importers.go | 1 +
core/gallery/importers/mlx.go | 94 +++++++++++++++++++++++++++++
2 files changed, 95 insertions(+)
create mode 100644 core/gallery/importers/mlx.go
diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go
index 5236af0cc245..532d8e68c9f5 100644
--- a/core/gallery/importers/importers.go
+++ b/core/gallery/importers/importers.go
@@ -7,6 +7,7 @@ import (
var DefaultImporters = []Importer{
&LlamaCPPImporter{},
+ &MLXImporter{},
}
type Importer interface {
diff --git a/core/gallery/importers/mlx.go b/core/gallery/importers/mlx.go
new file mode 100644
index 000000000000..a8e183791873
--- /dev/null
+++ b/core/gallery/importers/mlx.go
@@ -0,0 +1,94 @@
+package importers
+
+import (
+ "encoding/json"
+ "path/filepath"
+ "strings"
+
+ "github.com/mudler/LocalAI/core/config"
+ "github.com/mudler/LocalAI/core/gallery"
+ "github.com/mudler/LocalAI/core/schema"
+ "go.yaml.in/yaml/v2"
+)
+
+var _ Importer = &MLXImporter{}
+
+type MLXImporter struct{}
+
+func (i *MLXImporter) Match(uri string, request schema.ImportModelRequest) bool {
+ preferences, err := request.Preferences.MarshalJSON()
+ if err != nil {
+ return false
+ }
+ preferencesMap := make(map[string]any)
+ err = json.Unmarshal(preferences, &preferencesMap)
+ if err != nil {
+ return false
+ }
+
+ b, ok := preferencesMap["backend"].(string)
+ if ok && b == "mlx" || b == "mlx-vlm" {
+ return true
+ }
+
+ // All https://huggingface.co/mlx-community/*
+ if strings.Contains(uri, "mlx-community/") {
+ return true
+ }
+
+ return false
+}
+
+func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) {
+ preferences, err := request.Preferences.MarshalJSON()
+ if err != nil {
+ return gallery.ModelConfig{}, err
+ }
+ preferencesMap := make(map[string]any)
+ err = json.Unmarshal(preferences, &preferencesMap)
+ if err != nil {
+ return gallery.ModelConfig{}, err
+ }
+
+ name, ok := preferencesMap["name"].(string)
+ if !ok {
+ name = filepath.Base(uri)
+ }
+
+ description, ok := preferencesMap["description"].(string)
+ if !ok {
+ description = "Imported from " + uri
+ }
+
+ backend := "mlx"
+ b, ok := preferencesMap["backend"].(string)
+ if ok {
+ backend = b
+ }
+
+ modelConfig := config.ModelConfig{
+ Name: name,
+ Description: description,
+ KnownUsecaseStrings: []string{"chat"},
+ Backend: backend,
+ PredictionOptions: schema.PredictionOptions{
+ BasicModelRequest: schema.BasicModelRequest{
+ Model: uri,
+ },
+ },
+ TemplateConfig: config.TemplateConfig{
+ UseTokenizerTemplate: true,
+ },
+ }
+
+ data, err := yaml.Marshal(modelConfig)
+ if err != nil {
+ return gallery.ModelConfig{}, err
+ }
+
+ return gallery.ModelConfig{
+ Name: name,
+ Description: description,
+ ConfigFile: string(data),
+ }, nil
+}
From dc44c57c80e91f6073807eac5b865578479064ec Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 12 Nov 2025 12:53:02 +0100
Subject: [PATCH 10/16] Small refactors to star to use HF for discovery
Signed-off-by: Ettore Di Giacinto
---
core/gallery/importers/importers.go | 47 +++++++++++++++++++--
core/gallery/importers/llama-cpp.go | 20 ++++-----
core/gallery/importers/mlx.go | 16 +++----
core/http/endpoints/localai/import_model.go | 14 ++----
core/startup/model_preload.go | 25 +++++------
5 files changed, 75 insertions(+), 47 deletions(-)
diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go
index 532d8e68c9f5..f87bc5b81a8e 100644
--- a/core/gallery/importers/importers.go
+++ b/core/gallery/importers/importers.go
@@ -1,8 +1,12 @@
package importers
import (
+ "encoding/json"
+
+ "github.com/rs/zerolog/log"
+
"github.com/mudler/LocalAI/core/gallery"
- "github.com/mudler/LocalAI/core/schema"
+ hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
)
var DefaultImporters = []Importer{
@@ -10,7 +14,44 @@ var DefaultImporters = []Importer{
&MLXImporter{},
}
+type Details struct {
+ HuggingFace *hfapi.ModelDetails
+ URI string
+ Preferences json.RawMessage
+}
+
type Importer interface {
- Match(uri string, request schema.ImportModelRequest) bool
- Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error)
+ Match(details Details) bool
+ Import(details Details) (gallery.ModelConfig, error)
+}
+
+func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.ModelConfig, error) {
+ var err error
+ var modelConfig gallery.ModelConfig
+
+ hf := hfapi.NewClient()
+
+ hfDetails, err := hf.GetModelDetails(uri)
+ if err != nil {
+ // maybe not a HF repository
+ // TODO: maybe we can check if the URI is a valid HF repository
+ log.Debug().Str("uri", uri).Msg("Failed to get model details, maybe not a HF repository")
+ }
+
+ details := Details{
+ HuggingFace: hfDetails,
+ URI: uri,
+ Preferences: preferences,
+ }
+
+ for _, importer := range DefaultImporters {
+ if importer.Match(details) {
+ modelConfig, err = importer.Import(details)
+ if err != nil {
+ continue
+ }
+ break
+ }
+ }
+ return modelConfig, err
}
diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go
index 3596c868f50a..d84500fa19e6 100644
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -16,8 +16,8 @@ var _ Importer = &LlamaCPPImporter{}
type LlamaCPPImporter struct{}
-func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest) bool {
- preferences, err := request.Preferences.MarshalJSON()
+func (i *LlamaCPPImporter) Match(details Details) bool {
+ preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return false
}
@@ -31,11 +31,11 @@ func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest)
return true
}
- return strings.HasSuffix(uri, ".gguf")
+ return strings.HasSuffix(details.URI, ".gguf")
}
-func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) {
- preferences, err := request.Preferences.MarshalJSON()
+func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) {
+ preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return gallery.ModelConfig{}, err
}
@@ -47,12 +47,12 @@ func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest)
name, ok := preferencesMap["name"].(string)
if !ok {
- name = filepath.Base(uri)
+ name = filepath.Base(details.URI)
}
description, ok := preferencesMap["description"].(string)
if !ok {
- description = "Imported from " + uri
+ description = "Imported from " + details.URI
}
modelConfig := config.ModelConfig{
@@ -62,7 +62,7 @@ func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest)
Backend: "llama-cpp",
PredictionOptions: schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
- Model: filepath.Base(uri),
+ Model: filepath.Base(details.URI),
},
},
TemplateConfig: config.TemplateConfig{
@@ -86,8 +86,8 @@ func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest)
ConfigFile: string(data),
Files: []gallery.File{
{
- URI: uri,
- Filename: filepath.Base(uri),
+ URI: details.URI,
+ Filename: filepath.Base(details.URI),
},
},
}, nil
diff --git a/core/gallery/importers/mlx.go b/core/gallery/importers/mlx.go
index a8e183791873..faa28846f4ea 100644
--- a/core/gallery/importers/mlx.go
+++ b/core/gallery/importers/mlx.go
@@ -15,8 +15,8 @@ var _ Importer = &MLXImporter{}
type MLXImporter struct{}
-func (i *MLXImporter) Match(uri string, request schema.ImportModelRequest) bool {
- preferences, err := request.Preferences.MarshalJSON()
+func (i *MLXImporter) Match(details Details) bool {
+ preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return false
}
@@ -32,15 +32,15 @@ func (i *MLXImporter) Match(uri string, request schema.ImportModelRequest) bool
}
// All https://huggingface.co/mlx-community/*
- if strings.Contains(uri, "mlx-community/") {
+ if strings.Contains(details.URI, "mlx-community/") {
return true
}
return false
}
-func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) {
- preferences, err := request.Preferences.MarshalJSON()
+func (i *MLXImporter) Import(details Details) (gallery.ModelConfig, error) {
+ preferences, err := details.Preferences.MarshalJSON()
if err != nil {
return gallery.ModelConfig{}, err
}
@@ -52,12 +52,12 @@ func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gal
name, ok := preferencesMap["name"].(string)
if !ok {
- name = filepath.Base(uri)
+ name = filepath.Base(details.URI)
}
description, ok := preferencesMap["description"].(string)
if !ok {
- description = "Imported from " + uri
+ description = "Imported from " + details.URI
}
backend := "mlx"
@@ -73,7 +73,7 @@ func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gal
Backend: backend,
PredictionOptions: schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
- Model: uri,
+ Model: details.URI,
},
},
TemplateConfig: config.TemplateConfig{
diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go
index ea71ae53ae5d..b6d34055390f 100644
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -30,17 +30,9 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
return err
}
- var err error
- var modelConfig gallery.ModelConfig
-
- for _, importer := range importers.DefaultImporters {
- if importer.Match(input.URI, *input) {
- modelConfig, err = importer.Import(input.URI, *input)
- if err != nil {
- continue
- }
- break
- }
+ modelConfig, err := importers.DiscoverModelConfig(input.URI, input.Preferences)
+ if err != nil {
+ return fmt.Errorf("failed to discover model config: %w", err)
}
uuid, err := uuid.NewUUID()
diff --git a/core/startup/model_preload.go b/core/startup/model_preload.go
index 4550a25e46f3..193aad6a2e52 100644
--- a/core/startup/model_preload.go
+++ b/core/startup/model_preload.go
@@ -1,6 +1,7 @@
package startup
import (
+ "encoding/json"
"errors"
"fmt"
"os"
@@ -13,7 +14,6 @@ import (
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/core/gallery/importers"
- "github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/services"
"github.com/mudler/LocalAI/pkg/downloader"
"github.com/mudler/LocalAI/pkg/model"
@@ -165,22 +165,17 @@ func InstallModels(galleryService *services.GalleryService, galleries, backendGa
continue
}
- // TODO: start autoimporter
- var err error
- var modelConfig gallery.ModelConfig
- for _, importer := range importers.DefaultImporters {
- if importer.Match(url, schema.ImportModelRequest{}) {
- modelConfig, err = importer.Import(url, schema.ImportModelRequest{})
- if err != nil {
- continue
- }
- break
- }
+ // TODO: we should just use the discoverModelConfig here and default to this.
+ modelConfig, discoverErr := importers.DiscoverModelConfig(url, json.RawMessage{})
+ if discoverErr != nil {
+ err = errors.Join(discoverErr, fmt.Errorf("failed to discover model config: %w", err))
+ continue
}
- uuid, err := uuid.NewUUID()
- if err != nil {
- return err
+ uuid, uuidErr := uuid.NewUUID()
+ if uuidErr != nil {
+ err = errors.Join(uuidErr, fmt.Errorf("failed to generate UUID: %w", uuidErr))
+ continue
}
galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
From 71f2163459c932b521cdd1fc9eba24fceb6908ef Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 12 Nov 2025 16:05:27 +0100
Subject: [PATCH 11/16] Add tests
Signed-off-by: Ettore Di Giacinto
---
.../gallery/importers/importers_suite_test.go | 13 ++
core/gallery/importers/llama-cpp_test.go | 128 +++++++++++++++
core/gallery/importers/mlx_test.go | 147 ++++++++++++++++++
3 files changed, 288 insertions(+)
create mode 100644 core/gallery/importers/importers_suite_test.go
create mode 100644 core/gallery/importers/llama-cpp_test.go
create mode 100644 core/gallery/importers/mlx_test.go
diff --git a/core/gallery/importers/importers_suite_test.go b/core/gallery/importers/importers_suite_test.go
new file mode 100644
index 000000000000..a65b8163ad56
--- /dev/null
+++ b/core/gallery/importers/importers_suite_test.go
@@ -0,0 +1,13 @@
+package importers_test
+
+import (
+ "testing"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+func TestImporters(t *testing.T) {
+ RegisterFailHandler(Fail)
+ RunSpecs(t, "Importers test suite")
+}
diff --git a/core/gallery/importers/llama-cpp_test.go b/core/gallery/importers/llama-cpp_test.go
new file mode 100644
index 000000000000..19f25e0a1dc6
--- /dev/null
+++ b/core/gallery/importers/llama-cpp_test.go
@@ -0,0 +1,128 @@
+package importers_test
+
+import (
+ "encoding/json"
+
+ "github.com/mudler/LocalAI/core/gallery/importers"
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+var _ = Describe("LlamaCPPImporter", func() {
+ var importer *importers.LlamaCPPImporter
+
+ BeforeEach(func() {
+ importer = &importers.LlamaCPPImporter{}
+ })
+
+ Context("Match", func() {
+ It("should match when URI ends with .gguf", func() {
+ details := importers.Details{
+ URI: "https://example.com/model.gguf",
+ }
+
+ result := importer.Match(details)
+ Expect(result).To(BeTrue())
+ })
+
+ It("should match when backend preference is llama-cpp", func() {
+ preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
+ details := importers.Details{
+ URI: "https://example.com/model",
+ Preferences: preferences,
+ }
+
+ result := importer.Match(details)
+ Expect(result).To(BeTrue())
+ })
+
+ It("should not match when URI does not end with .gguf and no backend preference", func() {
+ details := importers.Details{
+ URI: "https://example.com/model.bin",
+ }
+
+ result := importer.Match(details)
+ Expect(result).To(BeFalse())
+ })
+
+ It("should not match when backend preference is different", func() {
+ preferences := json.RawMessage(`{"backend": "mlx"}`)
+ details := importers.Details{
+ URI: "https://example.com/model",
+ Preferences: preferences,
+ }
+
+ result := importer.Match(details)
+ Expect(result).To(BeFalse())
+ })
+
+ It("should return false when JSON preferences are invalid", func() {
+ preferences := json.RawMessage(`invalid json`)
+ details := importers.Details{
+ URI: "https://example.com/model.gguf",
+ Preferences: preferences,
+ }
+
+ // Invalid JSON causes Match to return false early
+ result := importer.Match(details)
+ Expect(result).To(BeFalse())
+ })
+ })
+
+ Context("Import", func() {
+ It("should import model config with default name and description", func() {
+ details := importers.Details{
+ URI: "https://example.com/my-model.gguf",
+ }
+
+ modelConfig, err := importer.Import(details)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("my-model.gguf"))
+ Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
+ Expect(modelConfig.Files).To(HaveLen(1))
+ Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"))
+ Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"))
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("model: my-model.gguf"))
+ })
+
+ It("should import model config with custom name and description from preferences", func() {
+ preferences := json.RawMessage(`{"name": "custom-model", "description": "Custom description"}`)
+ details := importers.Details{
+ URI: "https://example.com/my-model.gguf",
+ Preferences: preferences,
+ }
+
+ modelConfig, err := importer.Import(details)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("custom-model"))
+ Expect(modelConfig.Description).To(Equal("Custom description"))
+ Expect(modelConfig.Files).To(HaveLen(1))
+ })
+
+ It("should handle invalid JSON preferences", func() {
+ preferences := json.RawMessage(`invalid json`)
+ details := importers.Details{
+ URI: "https://example.com/my-model.gguf",
+ Preferences: preferences,
+ }
+
+ _, err := importer.Import(details)
+ Expect(err).To(HaveOccurred())
+ })
+
+ It("should extract filename correctly from URI with path", func() {
+ details := importers.Details{
+ URI: "https://example.com/path/to/model.gguf",
+ }
+
+ modelConfig, err := importer.Import(details)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf"))
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("model: model.gguf"))
+ })
+ })
+})
diff --git a/core/gallery/importers/mlx_test.go b/core/gallery/importers/mlx_test.go
new file mode 100644
index 000000000000..82e02aff0b44
--- /dev/null
+++ b/core/gallery/importers/mlx_test.go
@@ -0,0 +1,147 @@
+package importers_test
+
+import (
+ "encoding/json"
+
+ "github.com/mudler/LocalAI/core/gallery/importers"
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+var _ = Describe("MLXImporter", func() {
+ var importer *importers.MLXImporter
+
+ BeforeEach(func() {
+ importer = &importers.MLXImporter{}
+ })
+
+ Context("Match", func() {
+ It("should match when URI contains mlx-community/", func() {
+ details := importers.Details{
+ URI: "https://huggingface.co/mlx-community/test-model",
+ }
+
+ result := importer.Match(details)
+ Expect(result).To(BeTrue())
+ })
+
+ It("should match when backend preference is mlx", func() {
+ preferences := json.RawMessage(`{"backend": "mlx"}`)
+ details := importers.Details{
+ URI: "https://example.com/model",
+ Preferences: preferences,
+ }
+
+ result := importer.Match(details)
+ Expect(result).To(BeTrue())
+ })
+
+ It("should match when backend preference is mlx-vlm", func() {
+ preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
+ details := importers.Details{
+ URI: "https://example.com/model",
+ Preferences: preferences,
+ }
+
+ result := importer.Match(details)
+ Expect(result).To(BeTrue())
+ })
+
+ It("should not match when URI does not contain mlx-community/ and no backend preference", func() {
+ details := importers.Details{
+ URI: "https://huggingface.co/other-org/test-model",
+ }
+
+ result := importer.Match(details)
+ Expect(result).To(BeFalse())
+ })
+
+ It("should not match when backend preference is different", func() {
+ preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
+ details := importers.Details{
+ URI: "https://example.com/model",
+ Preferences: preferences,
+ }
+
+ result := importer.Match(details)
+ Expect(result).To(BeFalse())
+ })
+
+ It("should return false when JSON preferences are invalid", func() {
+ preferences := json.RawMessage(`invalid json`)
+ details := importers.Details{
+ URI: "https://huggingface.co/mlx-community/test-model",
+ Preferences: preferences,
+ }
+
+ // Invalid JSON causes Match to return false early
+ result := importer.Match(details)
+ Expect(result).To(BeFalse())
+ })
+ })
+
+ Context("Import", func() {
+ It("should import model config with default name and description", func() {
+ details := importers.Details{
+ URI: "https://huggingface.co/mlx-community/test-model",
+ }
+
+ modelConfig, err := importer.Import(details)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("test-model"))
+ Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model"))
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("model: https://huggingface.co/mlx-community/test-model"))
+ })
+
+ It("should import model config with custom name and description from preferences", func() {
+ preferences := json.RawMessage(`{"name": "custom-mlx-model", "description": "Custom MLX description"}`)
+ details := importers.Details{
+ URI: "https://huggingface.co/mlx-community/test-model",
+ Preferences: preferences,
+ }
+
+ modelConfig, err := importer.Import(details)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("custom-mlx-model"))
+ Expect(modelConfig.Description).To(Equal("Custom MLX description"))
+ })
+
+ It("should use custom backend from preferences", func() {
+ preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
+ details := importers.Details{
+ URI: "https://huggingface.co/mlx-community/test-model",
+ Preferences: preferences,
+ }
+
+ modelConfig, err := importer.Import(details)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm"))
+ })
+
+ It("should handle invalid JSON preferences", func() {
+ preferences := json.RawMessage(`invalid json`)
+ details := importers.Details{
+ URI: "https://huggingface.co/mlx-community/test-model",
+ Preferences: preferences,
+ }
+
+ _, err := importer.Import(details)
+ Expect(err).To(HaveOccurred())
+ })
+
+ It("should extract filename correctly from URI with path", func() {
+ details := importers.Details{
+ URI: "https://huggingface.co/mlx-community/path/to/model",
+ }
+
+ modelConfig, err := importer.Import(details)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("model"))
+ })
+ })
+})
From b8ed4467f0cb6f43338dbb594fea27a781f32bb8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 12 Nov 2025 16:13:06 +0100
Subject: [PATCH 12/16] Common preferences
Signed-off-by: Ettore Di Giacinto
---
core/http/views/model-editor.html | 151 +++++++++++++++++++++++-------
1 file changed, 118 insertions(+), 33 deletions(-)
diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html
index f8db7cf66fae..68b26360c1b5 100644
--- a/core/http/views/model-editor.html
+++ b/core/http/views/model-editor.html
@@ -109,40 +109,107 @@
-
-
-
-
-
- :
-
-
-
-
+
+
+
+ Common Preferences
+
+
+
+
+
+
+
+ Force a specific backend. Leave empty to auto-detect from URI.
+
+
+
+
+
+
+
+
+ Custom name for the model. If empty, the filename will be used.
+
+
+
+
+
+
+
+
+ Custom description for the model. If empty, a default description will be generated.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Add custom key-value pairs for advanced configuration
+
-
- Add key-value pairs to customize the import process
-
@@ -354,6 +421,11 @@
isEditMode: {{if .ModelName}}true{{else}}false{{end}},
importUri: '',
preferences: [],
+ commonPreferences: {
+ backend: '',
+ name: '',
+ description: ''
+ },
isSubmitting: false,
currentJobId: null,
jobPollInterval: null,
@@ -402,11 +474,24 @@
this.isSubmitting = true;
try {
- // Build preferences object
+ // Build preferences object starting with common preferences
const prefsObj = {};
+
+ // Add common preferences (only non-empty values)
+ if (this.commonPreferences.backend && this.commonPreferences.backend.trim()) {
+ prefsObj.backend = this.commonPreferences.backend.trim();
+ }
+ if (this.commonPreferences.name && this.commonPreferences.name.trim()) {
+ prefsObj.name = this.commonPreferences.name.trim();
+ }
+ if (this.commonPreferences.description && this.commonPreferences.description.trim()) {
+ prefsObj.description = this.commonPreferences.description.trim();
+ }
+
+ // Add custom preferences (can override common ones)
this.preferences.forEach(pref => {
if (pref.key && pref.value) {
- prefsObj[pref.key] = pref.value;
+ prefsObj[pref.key.trim()] = pref.value.trim();
}
});
From 9cd627362efb0695d22fbe6635b26d50c970768c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 12 Nov 2025 17:41:04 +0100
Subject: [PATCH 13/16] Add support to bare HF repos
Signed-off-by: Ettore Di Giacinto
---
core/gallery/importers/importers.go | 10 +-
core/gallery/importers/importers_test.go | 173 +++++++++++++++++++++++
core/gallery/importers/llama-cpp.go | 76 +++++++---
core/gallery/importers/llama-cpp_test.go | 17 ++-
core/http/views/model-editor.html | 22 ++-
pkg/huggingface-api/client.go | 7 +
pkg/huggingface-api/client_test.go | 8 ++
7 files changed, 287 insertions(+), 26 deletions(-)
create mode 100644 core/gallery/importers/importers_test.go
diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go
index f87bc5b81a8e..0be81ed06d0d 100644
--- a/core/gallery/importers/importers.go
+++ b/core/gallery/importers/importers.go
@@ -2,6 +2,7 @@ package importers
import (
"encoding/json"
+ "strings"
"github.com/rs/zerolog/log"
@@ -31,11 +32,18 @@ func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.Model
hf := hfapi.NewClient()
- hfDetails, err := hf.GetModelDetails(uri)
+ hfrepoID := strings.ReplaceAll(uri, "huggingface://", "")
+ hfrepoID = strings.ReplaceAll(hfrepoID, "hf://", "")
+ hfrepoID = strings.ReplaceAll(hfrepoID, "https://huggingface.co/", "")
+
+ hfDetails, err := hf.GetModelDetails(hfrepoID)
if err != nil {
// maybe not a HF repository
// TODO: maybe we can check if the URI is a valid HF repository
log.Debug().Str("uri", uri).Msg("Failed to get model details, maybe not a HF repository")
+ } else {
+ log.Debug().Str("uri", uri).Msg("Got model details")
+ log.Debug().Any("details", hfDetails).Msg("Model details")
}
details := Details{
diff --git a/core/gallery/importers/importers_test.go b/core/gallery/importers/importers_test.go
new file mode 100644
index 000000000000..48fb6ac3d46a
--- /dev/null
+++ b/core/gallery/importers/importers_test.go
@@ -0,0 +1,173 @@
+package importers_test
+
+import (
+ "encoding/json"
+ "fmt"
+
+ "github.com/mudler/LocalAI/core/gallery/importers"
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+var _ = Describe("DiscoverModelConfig", func() {
+
+ Context("With only a repository URI", func() {
+ It("should discover and import using LlamaCPPImporter", func() {
+ uri := "https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"
+ preferences := json.RawMessage(`{}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
+ Expect(modelConfig.Name).To(Equal("LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(modelConfig.Files[0].Filename).To(Equal("localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/resolve/main/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(modelConfig.Files[0].SHA256).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"), fmt.Sprintf("Model config: %+v", modelConfig))
+ })
+ })
+
+ Context("with .gguf URI", func() {
+ It("should discover and import using LlamaCPPImporter", func() {
+ uri := "https://example.com/my-model.gguf"
+ preferences := json.RawMessage(`{}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("my-model.gguf"))
+ Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
+ })
+
+ It("should use custom preferences when provided", func() {
+ uri := "https://example.com/my-model.gguf"
+ preferences := json.RawMessage(`{"name": "custom-name", "description": "Custom description"}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("custom-name"))
+ Expect(modelConfig.Description).To(Equal("Custom description"))
+ })
+ })
+
+ Context("with mlx-community URI", func() {
+ It("should discover and import using MLXImporter", func() {
+ uri := "https://huggingface.co/mlx-community/test-model"
+ preferences := json.RawMessage(`{}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("test-model"))
+ Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model"))
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
+ })
+
+ It("should use custom preferences when provided", func() {
+ uri := "https://huggingface.co/mlx-community/test-model"
+ preferences := json.RawMessage(`{"name": "custom-mlx", "description": "Custom MLX description"}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("custom-mlx"))
+ Expect(modelConfig.Description).To(Equal("Custom MLX description"))
+ })
+ })
+
+ Context("with backend preference", func() {
+ It("should use llama-cpp backend when specified", func() {
+ uri := "https://example.com/model"
+ preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
+ })
+
+ It("should use mlx backend when specified", func() {
+ uri := "https://example.com/model"
+ preferences := json.RawMessage(`{"backend": "mlx"}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
+ })
+
+ It("should use mlx-vlm backend when specified", func() {
+ uri := "https://example.com/model"
+ preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm"))
+ })
+ })
+
+ Context("with HuggingFace URI formats", func() {
+ It("should handle huggingface:// prefix", func() {
+ uri := "huggingface://mlx-community/test-model"
+ preferences := json.RawMessage(`{}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("test-model"))
+ })
+
+ It("should handle hf:// prefix", func() {
+ uri := "hf://mlx-community/test-model"
+ preferences := json.RawMessage(`{}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("test-model"))
+ })
+
+ It("should handle https://huggingface.co/ prefix", func() {
+ uri := "https://huggingface.co/mlx-community/test-model"
+ preferences := json.RawMessage(`{}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).ToNot(HaveOccurred())
+ Expect(modelConfig.Name).To(Equal("test-model"))
+ })
+ })
+
+ Context("with invalid or non-matching URI", func() {
+ It("should return error when no importer matches", func() {
+ uri := "https://example.com/unknown-model.bin"
+ preferences := json.RawMessage(`{}`)
+
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ // When no importer matches, the function returns empty config and error
+ // The exact behavior depends on implementation, but typically an error is returned
+ Expect(modelConfig.Name).To(BeEmpty())
+ Expect(err).To(HaveOccurred())
+ })
+ })
+
+ Context("with invalid JSON preferences", func() {
+ It("should return error when JSON is invalid even if URI matches", func() {
+ uri := "https://example.com/model.gguf"
+ preferences := json.RawMessage(`invalid json`)
+
+ // Even though Match() returns true for .gguf extension,
+ // Import() will fail when trying to unmarshal invalid JSON preferences
+ modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+ Expect(err).To(HaveOccurred())
+ Expect(modelConfig.Name).To(BeEmpty())
+ })
+ })
+})
diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go
index d84500fa19e6..d4c8ff591fdb 100644
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -3,6 +3,7 @@ package importers
import (
"encoding/json"
"path/filepath"
+ "slices"
"strings"
"github.com/mudler/LocalAI/core/config"
@@ -31,7 +32,19 @@ func (i *LlamaCPPImporter) Match(details Details) bool {
return true
}
- return strings.HasSuffix(details.URI, ".gguf")
+ if strings.HasSuffix(details.URI, ".gguf") {
+ return true
+ }
+
+ if details.HuggingFace != nil {
+ for _, file := range details.HuggingFace.Files {
+ if strings.HasSuffix(file.Path, ".gguf") {
+ return true
+ }
+ }
+ }
+
+ return false
}
func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) {
@@ -55,16 +68,17 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
description = "Imported from " + details.URI
}
+ preferedQuantizations, _ := preferencesMap["quantizations"].(string)
+ quants := []string{"q4_k_m"}
+ if preferedQuantizations != "" {
+ quants = strings.Split(preferedQuantizations, ",")
+ }
+
modelConfig := config.ModelConfig{
Name: name,
Description: description,
KnownUsecaseStrings: []string{"chat"},
Backend: "llama-cpp",
- PredictionOptions: schema.PredictionOptions{
- BasicModelRequest: schema.BasicModelRequest{
- Model: filepath.Base(details.URI),
- },
- },
TemplateConfig: config.TemplateConfig{
UseTokenizerTemplate: true,
},
@@ -75,20 +89,48 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
},
}
+ cfg := gallery.ModelConfig{
+ Name: name,
+ Description: description,
+ }
+
+ if strings.Contains(details.URI, ".gguf") {
+ cfg.Files = append(cfg.Files, gallery.File{
+ URI: details.URI,
+ Filename: filepath.Base(details.URI),
+ })
+ modelConfig.PredictionOptions = schema.PredictionOptions{
+ BasicModelRequest: schema.BasicModelRequest{
+ Model: filepath.Base(details.URI),
+ },
+ }
+ } else if details.HuggingFace != nil {
+ for _, file := range details.HuggingFace.Files {
+ if slices.ContainsFunc(quants, func(quant string) bool {
+ return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
+ }) {
+ cfg.Files = append(cfg.Files, gallery.File{
+ URI: file.URL,
+ Filename: filepath.Base(file.Path),
+ SHA256: file.SHA256,
+ })
+ }
+ }
+ if len(modelConfig.DownloadFiles) > 0 {
+ modelConfig.PredictionOptions = schema.PredictionOptions{
+ BasicModelRequest: schema.BasicModelRequest{
+ Model: modelConfig.DownloadFiles[0].Filename,
+ },
+ }
+ }
+ }
+
data, err := yaml.Marshal(modelConfig)
if err != nil {
return gallery.ModelConfig{}, err
}
- return gallery.ModelConfig{
- Name: name,
- Description: description,
- ConfigFile: string(data),
- Files: []gallery.File{
- {
- URI: details.URI,
- Filename: filepath.Base(details.URI),
- },
- },
- }, nil
+ cfg.ConfigFile = string(data)
+
+ return cfg, nil
}
diff --git a/core/gallery/importers/llama-cpp_test.go b/core/gallery/importers/llama-cpp_test.go
index 19f25e0a1dc6..5a3c4d74f44d 100644
--- a/core/gallery/importers/llama-cpp_test.go
+++ b/core/gallery/importers/llama-cpp_test.go
@@ -2,6 +2,7 @@ package importers_test
import (
"encoding/json"
+ "fmt"
"github.com/mudler/LocalAI/core/gallery/importers"
. "github.com/onsi/ginkgo/v2"
@@ -80,11 +81,10 @@ var _ = Describe("LlamaCPPImporter", func() {
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("my-model.gguf"))
Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
- Expect(modelConfig.Files).To(HaveLen(1))
- Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"))
- Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"))
Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
- Expect(modelConfig.ConfigFile).To(ContainSubstring("model: my-model.gguf"))
+ Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
})
It("should import model config with custom name and description from preferences", func() {
@@ -99,7 +99,9 @@ var _ = Describe("LlamaCPPImporter", func() {
Expect(err).ToNot(HaveOccurred())
Expect(modelConfig.Name).To(Equal("custom-model"))
Expect(modelConfig.Description).To(Equal("Custom description"))
- Expect(modelConfig.Files).To(HaveLen(1))
+ Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
})
It("should handle invalid JSON preferences", func() {
@@ -121,8 +123,9 @@ var _ = Describe("LlamaCPPImporter", func() {
modelConfig, err := importer.Import(details)
Expect(err).ToNot(HaveOccurred())
- Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf"))
- Expect(modelConfig.ConfigFile).To(ContainSubstring("model: model.gguf"))
+ Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/path/to/model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+ Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
})
})
})
diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html
index 68b26360c1b5..d18aa6cdb924 100644
--- a/core/http/views/model-editor.html
+++ b/core/http/views/model-editor.html
@@ -167,6 +167,22 @@
Custom description for the model. If empty, a default description will be generated.
+
+
+
+
+
+
+ Preferred quantizations (comma-separated). Examples: q4_k_m, q4_k_s, q3_k_m, q2_k. Leave empty to use default (q4_k_m).
+
+
@@ -424,7 +440,8 @@
commonPreferences: {
backend: '',
name: '',
- description: ''
+ description: '',
+ quantizations: ''
},
isSubmitting: false,
currentJobId: null,
@@ -487,6 +504,9 @@
if (this.commonPreferences.description && this.commonPreferences.description.trim()) {
prefsObj.description = this.commonPreferences.description.trim();
}
+ if (this.commonPreferences.quantizations && this.commonPreferences.quantizations.trim()) {
+ prefsObj.quantizations = this.commonPreferences.quantizations.trim();
+ }
// Add custom preferences (can override common ones)
this.preferences.forEach(pref => {
diff --git a/pkg/huggingface-api/client.go b/pkg/huggingface-api/client.go
index 37fb52bb22c1..9b1959f1d857 100644
--- a/pkg/huggingface-api/client.go
+++ b/pkg/huggingface-api/client.go
@@ -51,6 +51,7 @@ type ModelFile struct {
Size int64
SHA256 string
IsReadme bool
+ URL string
}
// ModelDetails represents detailed information about a model
@@ -215,6 +216,7 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) {
}
// Process each file
+ baseURL := strings.TrimSuffix(c.baseURL, "/api/models")
for _, file := range files {
fileName := filepath.Base(file.Path)
isReadme := strings.Contains(strings.ToLower(fileName), "readme")
@@ -227,11 +229,16 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) {
sha256 = file.Oid
}
+ // Construct the full URL for the file
+ // Use /resolve/main/ for downloading files (handles LFS properly)
+ fileURL := fmt.Sprintf("%s/%s/resolve/main/%s", baseURL, repoID, file.Path)
+
modelFile := ModelFile{
Path: file.Path,
Size: file.Size,
SHA256: sha256,
IsReadme: isReadme,
+ URL: fileURL,
}
details.Files = append(details.Files, modelFile)
diff --git a/pkg/huggingface-api/client_test.go b/pkg/huggingface-api/client_test.go
index ffa35bb90a94..5ef5f7900930 100644
--- a/pkg/huggingface-api/client_test.go
+++ b/pkg/huggingface-api/client_test.go
@@ -1,6 +1,7 @@
package hfapi_test
import (
+ "fmt"
"net/http"
"net/http/httptest"
"strings"
@@ -461,6 +462,13 @@ var _ = Describe("HuggingFace API Client", func() {
Expect(details.ReadmeFile).ToNot(BeNil())
Expect(details.ReadmeFile.Path).To(Equal("README.md"))
Expect(details.ReadmeFile.IsReadme).To(BeTrue())
+
+ // Verify URLs are set for all files
+ baseURL := strings.TrimSuffix(server.URL, "/api/models")
+ for _, file := range details.Files {
+ expectedURL := fmt.Sprintf("%s/test/model/resolve/main/%s", baseURL, file.Path)
+ Expect(file.URL).To(Equal(expectedURL))
+ }
})
})
From c9f30fbc0f6e152e4c211fc5addb9174069e1046 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 12 Nov 2025 18:26:43 +0100
Subject: [PATCH 14/16] feat(importer/llama.cpp): add support for mmproj files
Signed-off-by: Ettore Di Giacinto
---
core/gallery/importers/llama-cpp.go | 55 +++++++++++++++++++++++++++--
1 file changed, 52 insertions(+), 3 deletions(-)
diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go
index d4c8ff591fdb..65bebf7d1966 100644
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -69,11 +69,17 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
}
preferedQuantizations, _ := preferencesMap["quantizations"].(string)
- quants := []string{"q4_k_m"}
+ quants := []string{"q4_k_m", "q4_0", "q8_0", "f16"}
if preferedQuantizations != "" {
quants = strings.Split(preferedQuantizations, ",")
}
+ mmprojQuants, _ := preferencesMap["mmproj_quantizations"].(string)
+ mmprojQuantsList := []string{"fp16"}
+ if mmprojQuants != "" {
+ mmprojQuantsList = strings.Split(mmprojQuants, ",")
+ }
+
modelConfig := config.ModelConfig{
Name: name,
Description: description,
@@ -105,7 +111,11 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
},
}
} else if details.HuggingFace != nil {
+ lastMMProjFile := gallery.File{}
+ foundPreferedQuant := false
+
for _, file := range details.HuggingFace.Files {
+ // get the files of the prefered quants
if slices.ContainsFunc(quants, func(quant string) bool {
return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
}) {
@@ -115,14 +125,53 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
SHA256: file.SHA256,
})
}
+ // Get the mmproj prefered quants
+ if strings.Contains(strings.ToLower(file.Path), "mmproj") {
+ lastMMProjFile = gallery.File{
+ URI: file.URL,
+ Filename: filepath.Base(file.Path),
+ SHA256: file.SHA256,
+ }
+ if slices.ContainsFunc(mmprojQuantsList, func(quant string) bool {
+ return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
+ }) {
+ foundPreferedQuant = true
+ cfg.Files = append(cfg.Files, lastMMProjFile)
+ }
+ }
}
- if len(modelConfig.DownloadFiles) > 0 {
+
+ if !foundPreferedQuant && lastMMProjFile.URI != "" {
+ cfg.Files = append(cfg.Files, lastMMProjFile)
modelConfig.PredictionOptions = schema.PredictionOptions{
BasicModelRequest: schema.BasicModelRequest{
- Model: modelConfig.DownloadFiles[0].Filename,
+ Model: lastMMProjFile.Filename,
},
}
}
+
+ // Find first mmproj file
+ for _, file := range cfg.Files {
+ if !strings.Contains(strings.ToLower(file.Filename), "mmproj") {
+ continue
+ }
+ modelConfig.MMProj = file.Filename
+ break
+ }
+
+ // Find first non-mmproj file
+ for _, file := range cfg.Files {
+ if strings.Contains(strings.ToLower(file.Filename), "mmproj") {
+ continue
+ }
+ modelConfig.PredictionOptions = schema.PredictionOptions{
+ BasicModelRequest: schema.BasicModelRequest{
+ Model: file.Filename,
+ },
+ }
+ break
+ }
+
}
data, err := yaml.Marshal(modelConfig)
From 0d02850f3ddbec347992a09267536652ad6535a3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 12 Nov 2025 18:33:01 +0100
Subject: [PATCH 15/16] add mmproj quants to common preferences
Signed-off-by: Ettore Di Giacinto
---
core/http/views/model-editor.html | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html
index d18aa6cdb924..d5b5fce764d7 100644
--- a/core/http/views/model-editor.html
+++ b/core/http/views/model-editor.html
@@ -183,6 +183,22 @@
Preferred quantizations (comma-separated). Examples: q4_k_m, q4_k_s, q3_k_m, q2_k. Leave empty to use default (q4_k_m).
+
+
+
+
+
+
+ Preferred MMProj quantizations (comma-separated). Examples: fp16, fp32. Leave empty to use default (fp16).
+
+
@@ -441,7 +457,8 @@
backend: '',
name: '',
description: '',
- quantizations: ''
+ quantizations: '',
+ mmproj_quantizations: ''
},
isSubmitting: false,
currentJobId: null,
@@ -507,6 +524,9 @@
if (this.commonPreferences.quantizations && this.commonPreferences.quantizations.trim()) {
prefsObj.quantizations = this.commonPreferences.quantizations.trim();
}
+ if (this.commonPreferences.mmproj_quantizations && this.commonPreferences.mmproj_quantizations.trim()) {
+ prefsObj.mmproj_quantizations = this.commonPreferences.mmproj_quantizations.trim();
+ }
// Add custom preferences (can override common ones)
this.preferences.forEach(pref => {
From 9366052eb955f7f12446eda762f638a2eaee61d3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto
Date: Wed, 12 Nov 2025 19:24:49 +0100
Subject: [PATCH 16/16] Fix vlm usage in tokenizer mode with llama.cpp
Signed-off-by: Ettore Di Giacinto
---
backend/cpp/llama-cpp/grpc-server.cpp | 134 ++++++++++++++++++++++++--
1 file changed, 124 insertions(+), 10 deletions(-)
diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp
index 790032d60316..17087fc060d0 100644
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -590,17 +590,71 @@ class BackendServiceImpl final : public backend::Backend::Service {
// Convert proto Messages to JSON format compatible with oaicompat_chat_params_parse
json body_json;
json messages_json = json::array();
+
+ // Find the last user message index to attach images/audio to
+ int last_user_msg_idx = -1;
+ for (int i = request->messages_size() - 1; i >= 0; i--) {
+ if (request->messages(i).role() == "user") {
+ last_user_msg_idx = i;
+ break;
+ }
+ }
+
for (int i = 0; i < request->messages_size(); i++) {
const auto& msg = request->messages(i);
json msg_json;
msg_json["role"] = msg.role();
+ bool is_last_user_msg = (i == last_user_msg_idx);
+ bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0);
+
// Handle content - can be string, null, or array
// For multimodal content, we'll embed images/audio from separate fields
if (!msg.content().empty()) {
- msg_json["content"] = msg.content();
- } else if (request->images_size() > 0 || request->audios_size() > 0) {
- // If no content but has images/audio, create content array
+ // Try to parse content as JSON to see if it's already an array
+ json content_val;
+ try {
+ content_val = json::parse(msg.content());
+ } catch (const json::parse_error&) {
+ // Not JSON, treat as plain string
+ content_val = msg.content();
+ }
+
+ // If content is a string and this is the last user message with images/audio, combine them
+ if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
+ json content_array = json::array();
+ // Add text first
+ content_array.push_back({{"type", "text"}, {"text", content_val.get()}});
+ // Add images
+ if (request->images_size() > 0) {
+ for (int j = 0; j < request->images_size(); j++) {
+ json image_chunk;
+ image_chunk["type"] = "image_url";
+ json image_url;
+ image_url["url"] = "data:image/jpeg;base64," + request->images(j);
+ image_chunk["image_url"] = image_url;
+ content_array.push_back(image_chunk);
+ }
+ }
+ // Add audios
+ if (request->audios_size() > 0) {
+ for (int j = 0; j < request->audios_size(); j++) {
+ json audio_chunk;
+ audio_chunk["type"] = "input_audio";
+ json input_audio;
+ input_audio["data"] = request->audios(j);
+ input_audio["format"] = "wav"; // default, could be made configurable
+ audio_chunk["input_audio"] = input_audio;
+ content_array.push_back(audio_chunk);
+ }
+ }
+ msg_json["content"] = content_array;
+ } else {
+ // Use content as-is (already array or not last user message)
+ msg_json["content"] = content_val;
+ }
+ } else if (is_last_user_msg && has_images_or_audio) {
+ // If no content but this is the last user message with images/audio, create content array
json content_array = json::array();
if (request->images_size() > 0) {
for (int j = 0; j < request->images_size(); j++) {
@@ -718,6 +772,9 @@ class BackendServiceImpl final : public backend::Backend::Service {
// Create parser options with current chat_templates to ensure tmpls is not null
oaicompat_parser_options parser_opt = ctx_server.oai_parser_opt;
parser_opt.tmpls = ctx_server.chat_templates.get(); // Ensure tmpls is set to current chat_templates
+ // Update allow_image and allow_audio based on current mctx state
+ parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
+ parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
// Extract the prompt from parsed data
@@ -758,7 +815,7 @@ class BackendServiceImpl final : public backend::Backend::Service {
// If not using chat templates, extract files from image_data/audio_data fields
// (If using chat templates, files were already extracted by oaicompat_chat_params_parse)
- //if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
+ if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
const auto &images_data = data.find("image_data");
if (images_data != data.end() && images_data->is_array())
{
@@ -778,7 +835,7 @@ class BackendServiceImpl final : public backend::Backend::Service {
files.push_back(decoded_data);
}
}
- // }
+ }
const bool has_mtmd = ctx_server.mctx != nullptr;
@@ -917,17 +974,71 @@ class BackendServiceImpl final : public backend::Backend::Service {
// Convert proto Messages to JSON format compatible with oaicompat_chat_params_parse
json body_json;
json messages_json = json::array();
+
+ // Find the last user message index to attach images/audio to
+ int last_user_msg_idx = -1;
+ for (int i = request->messages_size() - 1; i >= 0; i--) {
+ if (request->messages(i).role() == "user") {
+ last_user_msg_idx = i;
+ break;
+ }
+ }
+
for (int i = 0; i < request->messages_size(); i++) {
const auto& msg = request->messages(i);
json msg_json;
msg_json["role"] = msg.role();
+ bool is_last_user_msg = (i == last_user_msg_idx);
+ bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0);
+
// Handle content - can be string, null, or array
// For multimodal content, we'll embed images/audio from separate fields
if (!msg.content().empty()) {
- msg_json["content"] = msg.content();
- } else if (request->images_size() > 0 || request->audios_size() > 0) {
- // If no content but has images/audio, create content array
+ // Try to parse content as JSON to see if it's already an array
+ json content_val;
+ try {
+ content_val = json::parse(msg.content());
+ } catch (const json::parse_error&) {
+ // Not JSON, treat as plain string
+ content_val = msg.content();
+ }
+
+ // If content is a string and this is the last user message with images/audio, combine them
+ if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
+ json content_array = json::array();
+ // Add text first
+ content_array.push_back({{"type", "text"}, {"text", content_val.get()}});
+ // Add images
+ if (request->images_size() > 0) {
+ for (int j = 0; j < request->images_size(); j++) {
+ json image_chunk;
+ image_chunk["type"] = "image_url";
+ json image_url;
+ image_url["url"] = "data:image/jpeg;base64," + request->images(j);
+ image_chunk["image_url"] = image_url;
+ content_array.push_back(image_chunk);
+ }
+ }
+ // Add audios
+ if (request->audios_size() > 0) {
+ for (int j = 0; j < request->audios_size(); j++) {
+ json audio_chunk;
+ audio_chunk["type"] = "input_audio";
+ json input_audio;
+ input_audio["data"] = request->audios(j);
+ input_audio["format"] = "wav"; // default, could be made configurable
+ audio_chunk["input_audio"] = input_audio;
+ content_array.push_back(audio_chunk);
+ }
+ }
+ msg_json["content"] = content_array;
+ } else {
+ // Use content as-is (already array or not last user message)
+ msg_json["content"] = content_val;
+ }
+ } else if (is_last_user_msg && has_images_or_audio) {
+ // If no content but this is the last user message with images/audio, create content array
json content_array = json::array();
if (request->images_size() > 0) {
for (int j = 0; j < request->images_size(); j++) {
@@ -1048,6 +1159,9 @@ class BackendServiceImpl final : public backend::Backend::Service {
// Create parser options with current chat_templates to ensure tmpls is not null
oaicompat_parser_options parser_opt = ctx_server.oai_parser_opt;
parser_opt.tmpls = ctx_server.chat_templates.get(); // Ensure tmpls is set to current chat_templates
+ // Update allow_image and allow_audio based on current mctx state
+ parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
+ parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
// Extract the prompt from parsed data
@@ -1088,7 +1202,7 @@ class BackendServiceImpl final : public backend::Backend::Service {
// If not using chat templates, extract files from image_data/audio_data fields
// (If using chat templates, files were already extracted by oaicompat_chat_params_parse)
- // if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
+ if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
const auto &images_data = data.find("image_data");
if (images_data != data.end() && images_data->is_array())
{
@@ -1110,7 +1224,7 @@ class BackendServiceImpl final : public backend::Backend::Service {
files.push_back(decoded_data);
}
}
- // }
+ }
// process files
const bool has_mtmd = ctx_server.mctx != nullptr;