From c51124c710e1aa134d0ff10afad6511559d73022 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 11 Nov 2025 18:48:33 +0100
Subject: [PATCH 01/16] feat: initial hook to install elements directly

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/localai/backend.go      |  4 +-
 core/http/endpoints/localai/gallery.go      |  4 +-
 core/http/endpoints/localai/import_model.go | 43 +++++++++++++++++++++
 core/http/routes/localai.go                 |  3 ++
 core/http/routes/ui_api.go                  |  8 ++--
 core/p2p/sync.go                            |  2 +-
 core/schema/localai.go                      |  6 +++
 core/services/backends.go                   |  2 +-
 core/services/gallery.go                    |  8 ++--
 core/services/models.go                     | 35 ++++++++++-------
 core/services/operation.go                  |  8 +++-
 11 files changed, 92 insertions(+), 31 deletions(-)

diff --git a/core/http/endpoints/localai/backend.go b/core/http/endpoints/localai/backend.go
index f4d88ffec154..e2f5f5635a21 100644
--- a/core/http/endpoints/localai/backend.go
+++ b/core/http/endpoints/localai/backend.go
@@ -76,7 +76,7 @@ func (mgs *BackendEndpointService) ApplyBackendEndpoint() func(c *fiber.Ctx) err
 		if err != nil {
 			return err
 		}
-		mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{
+		mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{
 			ID:                 uuid.String(),
 			GalleryElementName: input.ID,
 			Galleries:          mgs.galleries,
@@ -95,7 +95,7 @@ func (mgs *BackendEndpointService) DeleteBackendEndpoint() func(c *fiber.Ctx) er
 	return func(c *fiber.Ctx) error {
 		backendName := c.Params("name")
 
-		mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend]{
+		mgs.backendApplier.BackendGalleryChannel <- services.GalleryOp[gallery.GalleryBackend, any]{
 			Delete:             true,
 			GalleryElementName: backendName,
 			Galleries:          mgs.galleries,
diff --git a/core/http/endpoints/localai/gallery.go b/core/http/endpoints/localai/gallery.go
index 8948fde34250..d8079bcc5128 100644
--- a/core/http/endpoints/localai/gallery.go
+++ b/core/http/endpoints/localai/gallery.go
@@ -77,7 +77,7 @@ func (mgs *ModelGalleryEndpointService) ApplyModelGalleryEndpoint() func(c *fibe
 		if err != nil {
 			return err
 		}
-		mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
+		mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
 			Req:                input.GalleryModel,
 			ID:                 uuid.String(),
 			GalleryElementName: input.ID,
@@ -98,7 +98,7 @@ func (mgs *ModelGalleryEndpointService) DeleteModelGalleryEndpoint() func(c *fib
 	return func(c *fiber.Ctx) error {
 		modelName := c.Params("name")
 
-		mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
+		mgs.galleryApplier.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
 			Delete:             true,
 			GalleryElementName: modelName,
 		}
diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go
index c6ff6bb962ea..e579642ed10e 100644
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -2,16 +2,59 @@ package localai
 
 import (
 	"encoding/json"
+	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 
 	"github.com/gofiber/fiber/v2"
+	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/gallery"
+	httpUtils "github.com/mudler/LocalAI/core/http/utils"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/utils"
+
 	"gopkg.in/yaml.v3"
 )
 
+// ImportModelURIEndpoint handles creating new model configurations from a URI
+func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) fiber.Handler {
+	return func(c *fiber.Ctx) error {
+
+		input := new(schema.ImportModelRequest)
+
+		if err := c.BodyParser(input); err != nil {
+			return err
+		}
+
+		// From the input, we are going to extract a default settings for the model, and use the gallery service
+		// TODO: This is now all fake data
+		modelConfig := gallery.ModelConfig{
+			Name: "imported-model-name",
+		}
+
+		uuid, err := uuid.NewUUID()
+		if err != nil {
+			return err
+		}
+		galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
+			Req: gallery.GalleryModel{
+				Overrides: map[string]interface{}{},
+			},
+			ID:               uuid.String(),
+			GalleryElement:   &modelConfig,
+			BackendGalleries: appConfig.BackendGalleries,
+		}
+
+		return c.JSON(schema.GalleryResponse{
+			ID:        uuid.String(),
+			StatusURL: fmt.Sprintf("%smodels/jobs/%s", httpUtils.BaseURL(c), uuid.String()),
+		})
+	}
+}
+
 // ImportModelEndpoint handles creating new model configurations
 func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig) fiber.Handler {
 	return func(c *fiber.Ctx) error {
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index 298f5fd787b3..119a02b55311 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -57,6 +57,9 @@ func RegisterLocalAIRoutes(router *fiber.App,
 		// Custom model import endpoint
 		router.Post("/models/import", localai.ImportModelEndpoint(cl, appConfig))
 
+		// URI model import endpoint
+		router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService))
+
 		// Custom model edit endpoint
 		router.Post("/models/edit/:name", localai.EditModelEndpoint(cl, appConfig))
 
diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go
index 1fb016825c2b..d60809512d38 100644
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -247,7 +247,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
 		uid := id.String()
 		opcache.Set(galleryID, uid)
 
-		op := services.GalleryOp[gallery.GalleryModel]{
+		op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
 			ID:                 uid,
 			GalleryElementName: galleryID,
 			Galleries:          appConfig.Galleries,
@@ -290,7 +290,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
 
 		opcache.Set(galleryID, uid)
 
-		op := services.GalleryOp[gallery.GalleryModel]{
+		op := services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
 			ID:                 uid,
 			Delete:             true,
 			GalleryElementName: galleryName,
@@ -525,7 +525,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
 		uid := id.String()
 		opcache.Set(backendID, uid)
 
-		op := services.GalleryOp[gallery.GalleryBackend]{
+		op := services.GalleryOp[gallery.GalleryBackend, any]{
 			ID:                 uid,
 			GalleryElementName: backendID,
 			Galleries:          appConfig.BackendGalleries,
@@ -567,7 +567,7 @@ func RegisterUIAPIRoutes(app *fiber.App, cl *config.ModelConfigLoader, appConfig
 
 		opcache.Set(backendID, uid)
 
-		op := services.GalleryOp[gallery.GalleryBackend]{
+		op := services.GalleryOp[gallery.GalleryBackend, any]{
 			ID:                 uid,
 			Delete:             true,
 			GalleryElementName: backendName,
diff --git a/core/p2p/sync.go b/core/p2p/sync.go
index f9be422a6bf5..44efe93d8224 100644
--- a/core/p2p/sync.go
+++ b/core/p2p/sync.go
@@ -73,7 +73,7 @@ func syncState(ctx context.Context, n *node.Node, app *application.Application)
 			continue
 		}
 
-		app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel]{
+		app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
 			ID:                 uuid.String(),
 			GalleryElementName: model,
 			Galleries:          app.ApplicationConfig().Galleries,
diff --git a/core/schema/localai.go b/core/schema/localai.go
index 0f9460cf4ae1..5eb56d91bf5d 100644
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -1,6 +1,7 @@
 package schema
 
 import (
+	"encoding/json"
 	"time"
 
 	gopsutil "github.com/shirou/gopsutil/v3/process"
@@ -157,3 +158,8 @@ type Detection struct {
 	Height    float32 `json:"height"`
 	ClassName string  `json:"class_name"`
 }
+
+type ImportModelRequest struct {
+	URI         string          `json:"uri"`
+	Preferences json.RawMessage `json:"preferences,omitempty"`
+}
diff --git a/core/services/backends.go b/core/services/backends.go
index 7295734dfcbc..7ffca2ac1030 100644
--- a/core/services/backends.go
+++ b/core/services/backends.go
@@ -8,7 +8,7 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
-func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend], systemState *system.SystemState) error {
+func (g *GalleryService) backendHandler(op *GalleryOp[gallery.GalleryBackend, any], systemState *system.SystemState) error {
 	utils.ResetDownloadTimers()
 	g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})
 
diff --git a/core/services/gallery.go b/core/services/gallery.go
index 4833aa334ba2..2290c450d9ed 100644
--- a/core/services/gallery.go
+++ b/core/services/gallery.go
@@ -14,8 +14,8 @@ import (
 type GalleryService struct {
 	appConfig *config.ApplicationConfig
 	sync.Mutex
-	ModelGalleryChannel   chan GalleryOp[gallery.GalleryModel]
-	BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend]
+	ModelGalleryChannel   chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig]
+	BackendGalleryChannel chan GalleryOp[gallery.GalleryBackend, any]
 
 	modelLoader *model.ModelLoader
 	statuses    map[string]*GalleryOpStatus
@@ -24,8 +24,8 @@ type GalleryService struct {
 func NewGalleryService(appConfig *config.ApplicationConfig, ml *model.ModelLoader) *GalleryService {
 	return &GalleryService{
 		appConfig:             appConfig,
-		ModelGalleryChannel:   make(chan GalleryOp[gallery.GalleryModel]),
-		BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend]),
+		ModelGalleryChannel:   make(chan GalleryOp[gallery.GalleryModel, gallery.ModelConfig]),
+		BackendGalleryChannel: make(chan GalleryOp[gallery.GalleryBackend, any]),
 		modelLoader:           ml,
 		statuses:              make(map[string]*GalleryOpStatus),
 	}
diff --git a/core/services/models.go b/core/services/models.go
index 43696f455a09..b22999f6b977 100644
--- a/core/services/models.go
+++ b/core/services/models.go
@@ -9,10 +9,11 @@ import (
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/system"
 	"github.com/mudler/LocalAI/pkg/utils"
+	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v2"
 )
 
-func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel], cl *config.ModelConfigLoader, systemState *system.SystemState) error {
+func (g *GalleryService) modelHandler(op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig], cl *config.ModelConfigLoader, systemState *system.SystemState) error {
 	utils.ResetDownloadTimers()
 
 	g.UpdateStatus(op.ID, &GalleryOpStatus{Message: "processing", Progress: 0})
@@ -118,28 +119,32 @@ func ApplyGalleryFromString(systemState *system.SystemState, modelLoader *model.
 
 // processModelOperation handles the installation or deletion of a model
 func processModelOperation(
-	op *GalleryOp[gallery.GalleryModel],
+	op *GalleryOp[gallery.GalleryModel, gallery.ModelConfig],
 	systemState *system.SystemState,
 	modelLoader *model.ModelLoader,
 	enforcePredownloadScans bool,
 	automaticallyInstallBackend bool,
 	progressCallback func(string, string, string, float64),
 ) error {
-	// delete a model
-	if op.Delete {
+	switch {
+	case op.Delete:
 		return gallery.DeleteModelFromSystem(systemState, op.GalleryElementName)
-	}
-
-	// if the request contains a gallery name, we apply the gallery from the gallery list
-	if op.GalleryElementName != "" {
+	case op.GalleryElement != nil:
+		installedModel, err := gallery.InstallModel(
+			systemState, op.GalleryElement.Name,
+			op.GalleryElement,
+			op.Req.Overrides,
+			progressCallback, enforcePredownloadScans)
+		if automaticallyInstallBackend && installedModel.Backend != "" {
+			log.Debug().Msgf("Installing backend %q", installedModel.Backend)
+			if err := gallery.InstallBackendFromGallery(op.BackendGalleries, systemState, modelLoader, installedModel.Backend, progressCallback, false); err != nil {
+				return err
+			}
+		}
+		return err
+	case op.GalleryElementName != "":
 		return gallery.InstallModelFromGallery(op.Galleries, op.BackendGalleries, systemState, modelLoader, op.GalleryElementName, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend)
-		// } else if op.ConfigURL != "" {
-		// 	err := startup.InstallModels(op.Galleries, modelPath, enforcePredownloadScans, progressCallback, op.ConfigURL)
-		// 	if err != nil {
-		// 		return err
-		// 	}
-		// 	return cl.Preload(modelPath)
-	} else {
+	default:
 		return installModelFromRemoteConfig(systemState, modelLoader, op.Req, progressCallback, enforcePredownloadScans, automaticallyInstallBackend, op.BackendGalleries)
 	}
 }
diff --git a/core/services/operation.go b/core/services/operation.go
index 962921807f9d..d0ba76ceb1a9 100644
--- a/core/services/operation.go
+++ b/core/services/operation.go
@@ -5,12 +5,16 @@ import (
 	"github.com/mudler/LocalAI/pkg/xsync"
 )
 
-type GalleryOp[T any] struct {
+type GalleryOp[T any, E any] struct {
 	ID                 string
 	GalleryElementName string
 	Delete             bool
 
-	Req              T
+	Req T
+
+	// If specified, we install directly the gallery element
+	GalleryElement *E
+
 	Galleries        []config.Gallery
 	BackendGalleries []config.Gallery
 }

From 1c5d78d3dadcee40e291179dfd53516c3cbf3772 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 11 Nov 2025 19:04:28 +0100
Subject: [PATCH 02/16] WIP: ui changes

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/views/model-editor.html        | 669 +++++++++++++++--------
 core/http/views/partials/inprogress.html |   9 +
 2 files changed, 465 insertions(+), 213 deletions(-)

diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html
index d5cbc550b7c4..f8db7cf66fae 100644
--- a/core/http/views/model-editor.html
+++ b/core/http/views/model-editor.html
@@ -3,9 +3,10 @@
 {{template "views/partials/head" .}}
 
 <body class="bg-[#101827] text-[#E5E7EB]">
-<div class="flex flex-col min-h-screen">
+<div class="flex flex-col min-h-screen" x-data="importModel()" x-init="init()">
 
     {{template "views/partials/navbar" .}}
+    {{template "views/partials/inprogress" .}}
 
     <div class="container mx-auto px-4 py-8 flex-grow">
         <!-- Hero Header -->
@@ -24,19 +25,44 @@ <h1 class="text-3xl md:text-4xl font-bold text-white mb-2">
                                 {{if .ModelName}}Edit Model: {{.ModelName}}{{else}}Import New Model{{end}}
                             </span>
                         </h1>
-                        <p class="text-lg text-gray-300 font-light">Configure your model settings using YAML</p>
+                        <p class="text-lg text-gray-300 font-light" x-text="isAdvancedMode ? 'Configure your model settings using YAML' : 'Import a model from URI with preferences'"></p>
                     </div>
                     <div class="flex gap-3">
-                        <button id="validateBtn" class="group relative inline-flex items-center bg-gradient-to-r from-blue-600 to-blue-700 hover:from-blue-700 hover:to-blue-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-blue-500/25">
-                            <i class="fas fa-check mr-2 group-hover:animate-pulse"></i>
-                            <span>Validate</span>
-                            <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
-                        </button>
-                        <button id="saveBtn" class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
-                            <i class="fas fa-save mr-2 group-hover:animate-pulse"></i>
-                            <span>{{if .ModelName}}Update{{else}}Create{{end}}</span>
-                            <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
-                        </button>
+                        <!-- Mode Toggle (only show when not in edit mode) -->
+                        <template x-if="!isEditMode">
+                            <button @click="toggleMode()" 
+                                    class="group relative inline-flex items-center bg-gradient-to-r from-gray-600 to-gray-700 hover:from-gray-700 hover:to-gray-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl">
+                                <i class="fas group-hover:animate-pulse" :class="isAdvancedMode ? 'fa-magic mr-2' : 'fa-code mr-2'"></i>
+                                <span x-text="isAdvancedMode ? 'Simple Mode' : 'Advanced Mode'"></span>
+                                <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
+                            </button>
+                        </template>
+                        <!-- Advanced Mode Buttons -->
+                        <template x-if="isAdvancedMode">
+                            <div class="flex gap-3">
+                                <button id="validateBtn" class="group relative inline-flex items-center bg-gradient-to-r from-blue-600 to-blue-700 hover:from-blue-700 hover:to-blue-800 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-blue-500/25">
+                                    <i class="fas fa-check mr-2 group-hover:animate-pulse"></i>
+                                    <span>Validate</span>
+                                    <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
+                                </button>
+                                <button id="saveBtn" class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
+                                    <i class="fas fa-save mr-2 group-hover:animate-pulse"></i>
+                                    <span>{{if .ModelName}}Update{{else}}Create{{end}}</span>
+                                    <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
+                                </button>
+                            </div>
+                        </template>
+                        <!-- Simple Mode Button -->
+                        <template x-if="!isAdvancedMode && !isEditMode">
+                            <button @click="submitImport()" 
+                                    :disabled="isSubmitting || !importUri.trim()"
+                                    :class="(isSubmitting || !importUri.trim()) ? 'opacity-50 cursor-not-allowed' : ''"
+                                    class="group relative inline-flex items-center bg-gradient-to-r from-green-600 to-emerald-600 hover:from-green-700 hover:to-emerald-700 text-white py-3 px-6 rounded-xl font-semibold transition-all duration-300 ease-in-out transform hover:scale-105 hover:shadow-xl hover:shadow-green-500/25">
+                                <i class="fas group-hover:animate-pulse" :class="isSubmitting ? 'fa-spinner fa-spin mr-2' : 'fa-upload mr-2'"></i>
+                                <span x-text="isSubmitting ? 'Importing...' : 'Import Model'"></span>
+                                <div class="absolute inset-0 rounded-xl bg-white/10 opacity-0 group-hover:opacity-100 transition-opacity"></div>
+                            </button>
+                        </template>
                     </div>
                 </div>
             </div>
@@ -45,8 +71,88 @@ <h1 class="text-3xl md:text-4xl font-bold text-white mb-2">
         <!-- Alert Messages -->
         <div id="alertContainer" class="mb-6"></div>
 
-        <!-- YAML Editor Panel -->
-        <div class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm h-[calc(100vh-250px)]">
+        <!-- Simple Import Mode -->
+        <div x-show="!isAdvancedMode && !isEditMode" 
+             x-transition:enter="transition ease-out duration-300"
+             x-transition:enter-start="opacity-0 transform translate-y-4"
+             x-transition:enter-end="opacity-100 transform translate-y-0"
+             class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm p-8">
+            <div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-green-500/5 to-emerald-500/5"></div>
+            
+            <div class="relative space-y-6">
+                <h2 class="text-2xl font-semibold text-white flex items-center gap-3 mb-6">
+                    <div class="w-10 h-10 rounded-lg bg-green-500/20 flex items-center justify-center">
+                        <i class="fas fa-link text-green-400"></i>
+                    </div>
+                    Import from URI
+                </h2>
+
+                <!-- URI Input -->
+                <div>
+                    <label class="block text-sm font-medium text-gray-300 mb-2">
+                        <i class="fas fa-link mr-2"></i>Model URI
+                    </label>
+                    <input 
+                        x-model="importUri"
+                        type="text" 
+                        placeholder="https://example.com/model.gguf or file:///path/to/model.gguf"
+                        class="w-full px-4 py-3 bg-gray-900/90 border border-gray-700/70 rounded-xl text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                        :disabled="isSubmitting">
+                    <p class="mt-2 text-xs text-gray-400">
+                        Enter the URI or path to the model file you want to import
+                    </p>
+                </div>
+
+                <!-- Preferences Section -->
+                <div>
+                    <div class="flex items-center justify-between mb-4">
+                        <label class="block text-sm font-medium text-gray-300">
+                            <i class="fas fa-cog mr-2"></i>Preferences (Optional)
+                        </label>
+                        <button @click="addPreference()" 
+                                :disabled="isSubmitting"
+                                class="text-sm px-3 py-1.5 rounded-lg bg-green-600/20 hover:bg-green-600/30 text-green-300 border border-green-500/30 transition-all">
+                            <i class="fas fa-plus mr-1"></i>Add Preference
+                        </button>
+                    </div>
+                    
+                    <div class="space-y-3" x-show="preferences.length > 0">
+                        <template x-for="(pref, index) in preferences" :key="index">
+                            <div class="flex gap-3 items-center">
+                                <input 
+                                    x-model="pref.key"
+                                    type="text" 
+                                    placeholder="Key"
+                                    class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                    :disabled="isSubmitting">
+                                <span class="text-gray-400">:</span>
+                                <input 
+                                    x-model="pref.value"
+                                    type="text" 
+                                    placeholder="Value"
+                                    class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                    :disabled="isSubmitting">
+                                <button @click="removePreference(index)" 
+                                        :disabled="isSubmitting"
+                                        class="px-3 py-2 rounded-lg bg-red-600/20 hover:bg-red-600/30 text-red-300 border border-red-500/30 transition-all">
+                                    <i class="fas fa-trash"></i>
+                                </button>
+                            </div>
+                        </template>
+                    </div>
+                    <p class="mt-2 text-xs text-gray-400">
+                        Add key-value pairs to customize the import process
+                    </p>
+                </div>
+            </div>
+        </div>
+
+        <!-- Advanced YAML Editor Panel -->
+        <div x-show="isAdvancedMode || isEditMode" 
+             x-transition:enter="transition ease-out duration-300"
+             x-transition:enter-start="opacity-0 transform translate-y-4"
+             x-transition:enter-end="opacity-100 transform translate-y-0"
+             class="relative bg-gradient-to-br from-gray-800/90 to-gray-900/90 border border-gray-700/50 rounded-2xl overflow-hidden shadow-xl backdrop-blur-sm h-[calc(100vh-250px)]">
             <div class="absolute inset-0 rounded-2xl bg-gradient-to-br from-fuchsia-500/5 to-purple-500/5"></div>
             
             <div class="relative sticky top-0 bg-gray-800/95 border-b border-gray-700/50 p-6 flex items-center justify-between z-10 backdrop-blur-sm">
@@ -144,22 +250,22 @@ <h2 class="text-xl font-semibold text-white flex items-center gap-3">
 }
 
 /* Enhanced YAML Syntax Highlighting */
-.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for YAML keys */
-.cm-string { color: #10b981 !important; } /* Emerald for strings */
-.cm-number { color: #f59e0b !important; } /* Amber for numbers */
-.cm-comment { color: #6b7280 !important; font-style: italic !important; } /* Gray for comments */
-.cm-property { color: #ec4899 !important; } /* Pink for properties */
-.cm-operator { color: #ef4444 !important; } /* Red for operators */
-.cm-variable { color: #06b6d4 !important; } /* Cyan for variables */
-.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; } /* Purple for tags */
-.cm-attribute { color: #f59e0b !important; } /* Amber for attributes */
-.cm-def { color: #ec4899 !important; font-weight: 600 !important; } /* Pink for definitions */
-.cm-bracket { color: #d1d5db !important; } /* Light gray for brackets */
-.cm-punctuation { color: #d1d5db !important; } /* Light gray for punctuation */
-.cm-quote { color: #10b981 !important; } /* Emerald for quotes */
-.cm-meta { color: #6b7280 !important; } /* Gray for meta */
-.cm-builtin { color: #f472b6 !important; } /* Pink for builtins */
-.cm-atom { color: #f59e0b !important; } /* Amber for atoms like true/false/null */
+.cm-keyword { color: #8b5cf6 !important; font-weight: 600 !important; }
+.cm-string { color: #10b981 !important; }
+.cm-number { color: #f59e0b !important; }
+.cm-comment { color: #6b7280 !important; font-style: italic !important; }
+.cm-property { color: #ec4899 !important; }
+.cm-operator { color: #ef4444 !important; }
+.cm-variable { color: #06b6d4 !important; }
+.cm-tag { color: #8b5cf6 !important; font-weight: 600 !important; }
+.cm-attribute { color: #f59e0b !important; }
+.cm-def { color: #ec4899 !important; font-weight: 600 !important; }
+.cm-bracket { color: #d1d5db !important; }
+.cm-punctuation { color: #d1d5db !important; }
+.cm-quote { color: #10b981 !important; }
+.cm-meta { color: #6b7280 !important; }
+.cm-builtin { color: #f472b6 !important; }
+.cm-atom { color: #f59e0b !important; }
 
 /* Enhanced scrollbar styling */
 .CodeMirror-scrollbar-filler, .CodeMirror-gutter-filler {
@@ -242,22 +348,195 @@ <h2 class="text-xl font-semibold text-white flex items-center gap-3">
 </style>
 
 <script>
-class ModelEditor {
-    constructor() {
-        this.modelName = '{{.ModelName}}';
-        this.isEditMode = !!this.modelName;
-        this.yamlEditor = null;
+function importModel() {
+    return {
+        isAdvancedMode: false,
+        isEditMode: {{if .ModelName}}true{{else}}false{{end}},
+        importUri: '',
+        preferences: [],
+        isSubmitting: false,
+        currentJobId: null,
+        jobPollInterval: null,
+        yamlEditor: null,
+        modelEditor: null,
         
-        this.init();
-    }
-
-    init() {
-        this.initializeCodeMirror();
-        this.bindEvents();
-    }
-
-    getDefaultConfig() {
-        return `# Model Configuration
+        init() {
+            // If in edit mode, always show advanced mode
+            if (this.isEditMode) {
+                this.isAdvancedMode = true;
+            }
+            
+            // Initialize YAML editor if in advanced mode
+            if (this.isAdvancedMode || this.isEditMode) {
+                this.$nextTick(() => {
+                    this.initializeCodeMirror();
+                    this.bindAdvancedEvents();
+                });
+            }
+        },
+        
+        toggleMode() {
+            this.isAdvancedMode = !this.isAdvancedMode;
+            if (this.isAdvancedMode) {
+                this.$nextTick(() => {
+                    this.initializeCodeMirror();
+                    this.bindAdvancedEvents();
+                });
+            }
+        },
+        
+        addPreference() {
+            this.preferences.push({ key: '', value: '' });
+        },
+        
+        removePreference(index) {
+            this.preferences.splice(index, 1);
+        },
+        
+        async submitImport() {
+            if (!this.importUri.trim()) {
+                this.showAlert('error', 'Please enter a model URI');
+                return;
+            }
+            
+            this.isSubmitting = true;
+            
+            try {
+                // Build preferences object
+                const prefsObj = {};
+                this.preferences.forEach(pref => {
+                    if (pref.key && pref.value) {
+                        prefsObj[pref.key] = pref.value;
+                    }
+                });
+                
+                const requestBody = {
+                    uri: this.importUri.trim(),
+                    preferences: Object.keys(prefsObj).length > 0 ? prefsObj : null
+                };
+                
+                const response = await fetch('/models/import-uri', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify(requestBody)
+                });
+                
+                if (!response.ok) {
+                    const error = await response.json().catch(() => ({ error: 'Failed to start import' }));
+                    throw new Error(error.error || 'Failed to start import');
+                }
+                
+                const result = await response.json();
+                
+                if (result.uuid) {
+                    this.currentJobId = result.uuid;
+                    this.showAlert('success', 'Import started! Tracking progress...');
+                    this.startJobPolling();
+                } else if (result.ID) {
+                    // Fallback for different response format
+                    this.currentJobId = result.ID;
+                    this.showAlert('success', 'Import started! Tracking progress...');
+                    this.startJobPolling();
+                } else {
+                    throw new Error('No job ID returned from server');
+                }
+            } catch (error) {
+                this.showAlert('error', 'Failed to start import: ' + error.message);
+                this.isSubmitting = false;
+            }
+        },
+        
+        startJobPolling() {
+            if (this.jobPollInterval) {
+                clearInterval(this.jobPollInterval);
+            }
+            
+            this.jobPollInterval = setInterval(async () => {
+                if (!this.currentJobId) {
+                    clearInterval(this.jobPollInterval);
+                    return;
+                }
+                
+                try {
+                    const response = await fetch(`/models/jobs/${this.currentJobId}`);
+                    if (!response.ok) {
+                        return;
+                    }
+                    
+                    const jobData = await response.json();
+                    
+                    if (jobData.completed) {
+                        clearInterval(this.jobPollInterval);
+                        this.isSubmitting = false;
+                        this.currentJobId = null;
+                        this.showAlert('success', 'Model imported successfully! Refreshing page...');
+                        
+                        // Refresh the page after a short delay
+                        setTimeout(() => {
+                            window.location.reload();
+                        }, 2000);
+                    } else if (jobData.error) {
+                        clearInterval(this.jobPollInterval);
+                        this.isSubmitting = false;
+                        this.currentJobId = null;
+                        this.showAlert('error', 'Import failed: ' + jobData.error);
+                    }
+                } catch (error) {
+                    console.error('Error polling job status:', error);
+                }
+            }, 1000);
+        },
+        
+        initializeCodeMirror() {
+            if (this.yamlEditor) {
+                return; // Already initialized
+            }
+            
+            const initialValue = {{if .ConfigYAML}}`{{.ConfigYAML}}`{{else}}this.getDefaultConfig(){{end}};
+            
+            this.yamlEditor = CodeMirror(document.getElementById('yamlCodeMirror'), {
+                mode: 'yaml',
+                theme: 'default',
+                lineNumbers: true,
+                autoRefresh: true,
+                indentUnit: 2,
+                tabSize: 2,
+                indentWithTabs: false,
+                lineWrapping: true,
+                styleActiveLine: true,
+                matchBrackets: true,
+                autoCloseBrackets: true,
+                value: initialValue
+            });
+        },
+        
+        bindAdvancedEvents() {
+            if (!this.yamlEditor) return;
+            
+            // Button events
+            const saveBtn = document.getElementById('saveBtn');
+            const validateBtn = document.getElementById('validateBtn');
+            const formatYamlBtn = document.getElementById('formatYamlBtn');
+            const copyYamlBtn = document.getElementById('copyYamlBtn');
+            
+            if (saveBtn) {
+                saveBtn.addEventListener('click', () => this.saveConfig());
+            }
+            if (validateBtn) {
+                validateBtn.addEventListener('click', () => this.validateConfig());
+            }
+            if (formatYamlBtn) {
+                formatYamlBtn.addEventListener('click', () => this.formatYaml());
+            }
+            if (copyYamlBtn) {
+                copyYamlBtn.addEventListener('click', () => this.copyYaml());
+            }
+        },
+        
+        getDefaultConfig() {
+            return `# Model Configuration
 name: my-model
 backend: llama-cpp
 parameters:
@@ -286,186 +565,150 @@ <h2 class="text-xl font-semibold text-white flex items-center gap-3">
 #   - chat
 #   - completion
 `;
-    }
-
-    initializeCodeMirror() {
-        const initialValue = {{if .ConfigYAML}}`{{.ConfigYAML}}`{{else}}this.getDefaultConfig(){{end}};
+        },
         
-        this.yamlEditor = CodeMirror(document.getElementById('yamlCodeMirror'), {
-            mode: 'yaml',
-            theme: 'default',
-            lineNumbers: true,
-            autoRefresh: true,
-            indentUnit: 2,
-            tabSize: 2,
-            indentWithTabs: false,
-            lineWrapping: true,
-            styleActiveLine: true,
-            matchBrackets: true,
-            autoCloseBrackets: true,
-            value: initialValue
-        });
-    }
-
-    bindEvents() {
-        // Button events
-        document.getElementById('saveBtn').addEventListener('click', () => this.saveConfig());
-        document.getElementById('validateBtn').addEventListener('click', () => this.validateConfig());
-        document.getElementById('formatYamlBtn').addEventListener('click', () => this.formatYaml());
-        document.getElementById('copyYamlBtn').addEventListener('click', () => this.copyYaml());
-    }
-
-    validateConfig() {
-        try {
-            const yamlContent = this.yamlEditor.getValue();
-            const config = jsyaml.load(yamlContent);
-            
-            if (!config || typeof config !== 'object') {
-                throw new Error('Invalid YAML structure');
-            }
-            
-            if (!config.name) {
-                throw new Error('Model name is required');
+        validateConfig() {
+            try {
+                const yamlContent = this.yamlEditor.getValue();
+                const config = jsyaml.load(yamlContent);
+                
+                if (!config || typeof config !== 'object') {
+                    throw new Error('Invalid YAML structure');
+                }
+                
+                if (!config.name) {
+                    throw new Error('Model name is required');
+                }
+                if (!config.backend) {
+                    throw new Error('Backend is required');
+                }
+                if (!config.parameters || !config.parameters.model) {
+                    throw new Error('Model file/path is required in parameters.model');
+                }
+                
+                this.showAlert('success', 'Configuration is valid!');
+            } catch (error) {
+                this.showAlert('error', 'Validation failed: ' + error.message);
             }
-            if (!config.backend) {
-                throw new Error('Backend is required');
+        },
+        
+        async saveConfig() {
+            try {
+                // Validate before saving
+                const yamlContent = this.yamlEditor.getValue();
+                const config = jsyaml.load(yamlContent);
+                
+                if (!config || typeof config !== 'object') {
+                    throw new Error('Invalid YAML structure');
+                }
+                
+                if (!config.name) {
+                    throw new Error('Model name is required');
+                }
+                if (!config.backend) {
+                    throw new Error('Backend is required');
+                }
+                if (!config.parameters || !config.parameters.model) {
+                    throw new Error('Model file/path is required in parameters.model');
+                }
+                
+                const endpoint = this.isEditMode ? `/models/edit/{{.ModelName}}` : '/models/import';
+                
+                const response = await fetch(endpoint, {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/x-yaml',
+                    },
+                    body: yamlContent
+                });
+
+                const result = await response.json();
+                
+                if (result.success) {
+                    this.showAlert('success', result.message || (this.isEditMode ? 'Model updated successfully!' : 'Model created successfully!'));
+                    if (!this.isEditMode && config.name) {
+                        setTimeout(() => {
+                            window.location.href = `/models/edit/${config.name}`;
+                        }, 2000);
+                    }
+                } else {
+                    this.showAlert('error', result.error || 'Failed to save configuration');
+                }
+            } catch (error) {
+                this.showAlert('error', 'Failed to save: ' + error.message);
             }
-            if (!config.parameters || !config.parameters.model) {
-                throw new Error('Model file/path is required in parameters.model');
+        },
+        
+        formatYaml() {
+            try {
+                const yamlContent = this.yamlEditor.getValue();
+                const parsed = jsyaml.load(yamlContent);
+                const formatted = jsyaml.dump(parsed, {
+                    indent: 2,
+                    lineWidth: 120,
+                    noRefs: true,
+                    sortKeys: false
+                });
+                this.yamlEditor.setValue(formatted);
+                this.showAlert('success', 'YAML formatted successfully');
+            } catch (error) {
+                this.showAlert('error', 'Failed to format YAML: ' + error.message);
             }
-            
-            this.showAlert('success', 'Configuration is valid!');
-        } catch (error) {
-            this.showAlert('error', 'Validation failed: ' + error.message);
-        }
-    }
-
-    async saveConfig() {
-        try {
-            // Validate before saving
+        },
+        
+        copyYaml() {
             const yamlContent = this.yamlEditor.getValue();
-            const config = jsyaml.load(yamlContent);
-            
-            if (!config || typeof config !== 'object') {
-                throw new Error('Invalid YAML structure');
-            }
-            
-            if (!config.name) {
-                throw new Error('Model name is required');
-            }
-            if (!config.backend) {
-                throw new Error('Backend is required');
-            }
-            if (!config.parameters || !config.parameters.model) {
-                throw new Error('Model file/path is required in parameters.model');
-            }
+            navigator.clipboard.writeText(yamlContent).then(() => {
+                this.showAlert('success', 'YAML copied to clipboard');
+            }).catch(err => {
+                // Fallback for older browsers
+                const textArea = document.createElement('textarea');
+                textArea.value = yamlContent;
+                document.body.appendChild(textArea);
+                textArea.select();
+                document.execCommand('copy');
+                document.body.removeChild(textArea);
+                this.showAlert('success', 'YAML copied to clipboard');
+            });
+        },
+        
+        showAlert(type, message) {
+            const container = document.getElementById('alertContainer');
+            const alertClasses = {
+                success: 'alert alert-success',
+                error: 'alert alert-error',
+                warning: 'alert alert-warning',
+                info: 'alert alert-info'
+            };
             
-            const endpoint = this.isEditMode ? `/models/edit/${this.modelName}` : '/models/import';
+            const alertIcons = {
+                success: 'fas fa-check-circle',
+                error: 'fas fa-exclamation-triangle',
+                warning: 'fas fa-exclamation-circle',
+                info: 'fas fa-info-circle'
+            };
             
-            const response = await fetch(endpoint, {
-                method: 'POST',
-                headers: {
-                    'Content-Type': 'application/x-yaml',
-                },
-                body: yamlContent
-            });
-
-            const result = await response.json();
+            container.innerHTML = `
+                <div class="${alertClasses[type]}">
+                    <div class="flex items-center">
+                        <i class="${alertIcons[type]} mr-3 text-lg"></i>
+                        <span class="flex-1">${message}</span>
+                        <button onclick="this.parentElement.parentElement.remove()" class="ml-4 text-current hover:opacity-70 transition-opacity">
+                            <i class="fas fa-times"></i>
+                        </button>
+                    </div>
+                </div>
+            `;
             
-            if (result.success) {
-                this.showAlert('success', result.message || (this.isEditMode ? 'Model updated successfully!' : 'Model created successfully!'));
-                if (!this.isEditMode && config.name) {
-                    setTimeout(() => {
-                        window.location.href = `/models/edit/${config.name}`;
-                    }, 2000);
-                }
-            } else {
-                this.showAlert('error', result.error || 'Failed to save configuration');
+            if (type === 'success' || type === 'info') {
+                setTimeout(() => {
+                    const alert = container.querySelector('div');
+                    if (alert) alert.remove();
+                }, 5000);
             }
-        } catch (error) {
-            this.showAlert('error', 'Failed to save: ' + error.message);
         }
     }
-
-    formatYaml() {
-        try {
-            const yamlContent = this.yamlEditor.getValue();
-            const parsed = jsyaml.load(yamlContent);
-            const formatted = jsyaml.dump(parsed, {
-                indent: 2,
-                lineWidth: 120,
-                noRefs: true,
-                sortKeys: false
-            });
-            this.yamlEditor.setValue(formatted);
-            this.showAlert('success', 'YAML formatted successfully');
-        } catch (error) {
-            this.showAlert('error', 'Failed to format YAML: ' + error.message);
-        }
-    }
-
-    copyYaml() {
-        const yamlContent = this.yamlEditor.getValue();
-        navigator.clipboard.writeText(yamlContent).then(() => {
-            this.showAlert('success', 'YAML copied to clipboard');
-        }).catch(err => {
-            // Fallback for older browsers
-            const textArea = document.createElement('textarea');
-            textArea.value = yamlContent;
-            document.body.appendChild(textArea);
-            textArea.select();
-            document.execCommand('copy');
-            document.body.removeChild(textArea);
-            this.showAlert('success', 'YAML copied to clipboard');
-        });
-    }
-
-    showAlert(type, message) {
-        const container = document.getElementById('alertContainer');
-        const alertClasses = {
-            success: 'alert alert-success',
-            error: 'alert alert-error',
-            warning: 'alert alert-warning',
-            info: 'alert alert-info'
-        };
-        
-        const alertIcons = {
-            success: 'fas fa-check-circle',
-            error: 'fas fa-exclamation-triangle',
-            warning: 'fas fa-exclamation-circle',
-            info: 'fas fa-info-circle'
-        };
-        
-        container.innerHTML = `
-            <div class="${alertClasses[type]}">
-                <div class="flex items-center">
-                    <i class="${alertIcons[type]} mr-3 text-lg"></i>
-                    <span class="flex-1">${message}</span>
-                    <button onclick="this.parentElement.parentElement.remove()" class="ml-4 text-current hover:opacity-70 transition-opacity">
-                        <i class="fas fa-times"></i>
-                    </button>
-                </div>
-            </div>
-        `;
-        
-        if (type === 'success' || type === 'info') {
-            setTimeout(() => {
-                const alert = container.querySelector('div');
-                if (alert) alert.remove();
-            }, 5000);
-        }
-    }
-
-    clearAlert() {
-        document.getElementById('alertContainer').innerHTML = '';
-    }
 }
-
-// Initialize the editor when the page loads
-let modelEditor;
-document.addEventListener('DOMContentLoaded', () => {
-    modelEditor = new ModelEditor();
-});
 </script>
 
 </body>
diff --git a/core/http/views/partials/inprogress.html b/core/http/views/partials/inprogress.html
index 318406cb0881..7ebe04927d1b 100644
--- a/core/http/views/partials/inprogress.html
+++ b/core/http/views/partials/inprogress.html
@@ -120,8 +120,17 @@ <h3 class="text-[#E5E7EB] font-semibold text-sm">
                     throw new Error('Failed to fetch operations');
                 }
                 const data = await response.json();
+                const previousCount = this.operations.length;
                 this.operations = data.operations || [];
                 
+                // If we had operations before and now we don't, refresh the page
+                if (previousCount > 0 && this.operations.length === 0) {
+                    // Small delay to ensure the user sees the completion
+                    setTimeout(() => {
+                        window.location.reload();
+                    }, 1000);
+                }
+                
                 // Auto-collapse if there are many operations
                 if (this.operations.length > 5 && !this.collapsed) {
                     // Don't auto-collapse, let user control it

From 1ad42a08d7425e5d662afe820ae6cee3045763d5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 11 Nov 2025 21:28:23 +0100
Subject: [PATCH 03/16] Move HF api client to pkg

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/gallery-agent/agent.go                |   4 +-
 .github/gallery-agent/go.mod                  |  38 ----
 .github/gallery-agent/go.sum                  | 168 ------------------
 .github/gallery-agent/main.go                 |   2 +-
 .github/gallery-agent/tools.go                |   8 +-
 .github/workflows/gallery-agent.yaml          |   9 +-
 go.mod                                        |   2 +-
 .../hfapi => pkg/huggingface-api}/client.go   |   0
 .../huggingface-api}/client_test.go           |   2 +-
 .../huggingface-api}/hfapi_suite_test.go      |   2 +
 10 files changed, 12 insertions(+), 223 deletions(-)
 delete mode 100644 .github/gallery-agent/go.mod
 delete mode 100644 .github/gallery-agent/go.sum
 rename {.github/gallery-agent/hfapi => pkg/huggingface-api}/client.go (100%)
 rename {.github/gallery-agent/hfapi => pkg/huggingface-api}/client_test.go (99%)
 rename {.github/gallery-agent/hfapi => pkg/huggingface-api}/hfapi_suite_test.go (99%)

diff --git a/.github/gallery-agent/agent.go b/.github/gallery-agent/agent.go
index 19262669f574..687b5a1eb77f 100644
--- a/.github/gallery-agent/agent.go
+++ b/.github/gallery-agent/agent.go
@@ -7,8 +7,8 @@ import (
 	"slices"
 	"strings"
 
-	"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
-	"github.com/mudler/cogito"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
+	cogito "github.com/mudler/cogito"
 
 	"github.com/mudler/cogito/structures"
 	"github.com/sashabaranov/go-openai/jsonschema"
diff --git a/.github/gallery-agent/go.mod b/.github/gallery-agent/go.mod
deleted file mode 100644
index 7129f5507bb4..000000000000
--- a/.github/gallery-agent/go.mod
+++ /dev/null
@@ -1,38 +0,0 @@
-module github.com/go-skynet/LocalAI/.github/gallery-agent
-
-go 1.24.1
-
-require (
-	github.com/mudler/cogito v0.3.0
-	github.com/onsi/ginkgo/v2 v2.25.3
-	github.com/onsi/gomega v1.38.2
-	github.com/sashabaranov/go-openai v1.41.2
-	github.com/tmc/langchaingo v0.1.13
-)
-
-require (
-	dario.cat/mergo v1.0.1 // indirect
-	github.com/Masterminds/goutils v1.1.1 // indirect
-	github.com/Masterminds/semver/v3 v3.4.0 // indirect
-	github.com/Masterminds/sprig/v3 v3.3.0 // indirect
-	github.com/go-logr/logr v1.4.3 // indirect
-	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
-	github.com/google/go-cmp v0.7.0 // indirect
-	github.com/google/jsonschema-go v0.3.0 // indirect
-	github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
-	github.com/google/uuid v1.6.0 // indirect
-	github.com/huandu/xstrings v1.5.0 // indirect
-	github.com/mitchellh/copystructure v1.2.0 // indirect
-	github.com/mitchellh/reflectwalk v1.0.2 // indirect
-	github.com/modelcontextprotocol/go-sdk v1.0.0 // indirect
-	github.com/shopspring/decimal v1.4.0 // indirect
-	github.com/spf13/cast v1.7.0 // indirect
-	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
-	go.uber.org/automaxprocs v1.6.0 // indirect
-	go.yaml.in/yaml/v3 v3.0.4 // indirect
-	golang.org/x/crypto v0.41.0 // indirect
-	golang.org/x/net v0.43.0 // indirect
-	golang.org/x/sys v0.35.0 // indirect
-	golang.org/x/text v0.28.0 // indirect
-	golang.org/x/tools v0.36.0 // indirect
-)
diff --git a/.github/gallery-agent/go.sum b/.github/gallery-agent/go.sum
deleted file mode 100644
index a5488f5b60a5..000000000000
--- a/.github/gallery-agent/go.sum
+++ /dev/null
@@ -1,168 +0,0 @@
-dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
-dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
-github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
-github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
-github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
-github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
-github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs=
-github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0=
-github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
-github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
-github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
-github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
-github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
-github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
-github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
-github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
-github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
-github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
-github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
-github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A=
-github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw=
-github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA=
-github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc=
-github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
-github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
-github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
-github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw=
-github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
-github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
-github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
-github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
-github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
-github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw=
-github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ=
-github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
-github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
-github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
-github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
-github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
-github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
-github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
-github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
-github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
-github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
-github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
-github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
-github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
-github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
-github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
-github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
-github.com/google/jsonschema-go v0.3.0 h1:6AH2TxVNtk3IlvkkhjrtbUc4S8AvO0Xii0DxIygDg+Q=
-github.com/google/jsonschema-go v0.3.0/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
-github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
-github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
-github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
-github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
-github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
-github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
-github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
-github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
-github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
-github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
-github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
-github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE=
-github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
-github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
-github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
-github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
-github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
-github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
-github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
-github.com/moby/go-archive v0.1.0 h1:Kk/5rdW/g+H8NHdJW2gsXyZ7UnzvJNOy6VKJqueWdcQ=
-github.com/moby/go-archive v0.1.0/go.mod h1:G9B+YoujNohJmrIYFBpSd54GTUB4lt9S+xVQvsJyFuo=
-github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk=
-github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc=
-github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
-github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
-github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs=
-github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
-github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
-github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
-github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
-github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
-github.com/modelcontextprotocol/go-sdk v1.0.0 h1:Z4MSjLi38bTgLrd/LjSmofqRqyBiVKRyQSJgw8q8V74=
-github.com/modelcontextprotocol/go-sdk v1.0.0/go.mod h1:nYtYQroQ2KQiM0/SbyEPUWQ6xs4B95gJjEalc9AQyOs=
-github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
-github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
-github.com/mudler/cogito v0.3.0 h1:NbVAO3bLkK5oGSY0xq87jlz8C9OIsLW55s+8Hfzeu9s=
-github.com/mudler/cogito v0.3.0/go.mod h1:abMwl+CUjCp87IufA2quZdZt0bbLaHHN79o17HbUKxU=
-github.com/onsi/ginkgo/v2 v2.25.3 h1:Ty8+Yi/ayDAGtk4XxmmfUy4GabvM+MegeB4cDLRi6nw=
-github.com/onsi/ginkgo/v2 v2.25.3/go.mod h1:43uiyQC4Ed2tkOzLsEYm7hnrb7UJTWHYNsuy3bG/snE=
-github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
-github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
-github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
-github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
-github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
-github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
-github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
-github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
-github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
-github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
-github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
-github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
-github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
-github.com/sashabaranov/go-openai v1.41.2 h1:vfPRBZNMpnqu8ELsclWcAvF19lDNgh1t6TVfFFOPiSM=
-github.com/sashabaranov/go-openai v1.41.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
-github.com/shirou/gopsutil/v4 v4.25.5 h1:rtd9piuSMGeU8g1RMXjZs9y9luK5BwtnG7dZaQUJAsc=
-github.com/shirou/gopsutil/v4 v4.25.5/go.mod h1:PfybzyydfZcN+JMMjkF6Zb8Mq1A/VcogFFg7hj50W9c=
-github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
-github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
-github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
-github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
-github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w=
-github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
-github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
-github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
-github.com/testcontainers/testcontainers-go v0.38.0 h1:d7uEapLcv2P8AvH8ahLqDMMxda2W9gQN1nRbHS28HBw=
-github.com/testcontainers/testcontainers-go v0.38.0/go.mod h1:C52c9MoHpWO+C4aqmgSU+hxlR5jlEayWtgYrb8Pzz1w=
-github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
-github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
-github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
-github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
-github.com/tmc/langchaingo v0.1.13 h1:rcpMWBIi2y3B90XxfE4Ao8dhCQPVDMaNPnN5cGB1CaA=
-github.com/tmc/langchaingo v0.1.13/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
-github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
-github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
-github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
-github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
-go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
-go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 h1:Xs2Ncz0gNihqu9iosIZ5SkBbWo5T8JhhLJFMQL1qmLI=
-go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0/go.mod h1:vy+2G/6NvVMpwGX/NyLqcC41fxepnuKHk16E6IZUcJc=
-go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
-go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
-go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
-go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
-go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
-go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
-go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
-go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
-go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
-go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
-golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
-golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
-golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
-golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
-golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
-golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
-golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
-golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
-golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
-golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
-google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
-google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
-gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/.github/gallery-agent/main.go b/.github/gallery-agent/main.go
index 78d3a6cd7612..40412f198d4f 100644
--- a/.github/gallery-agent/main.go
+++ b/.github/gallery-agent/main.go
@@ -9,7 +9,7 @@ import (
 	"strings"
 	"time"
 
-	"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 )
 
 // ProcessedModelFile represents a processed model file with additional metadata
diff --git a/.github/gallery-agent/tools.go b/.github/gallery-agent/tools.go
index b44c327764f9..3e2fc2f3a17c 100644
--- a/.github/gallery-agent/tools.go
+++ b/.github/gallery-agent/tools.go
@@ -3,9 +3,9 @@ package main
 import (
 	"fmt"
 
-	"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
-	"github.com/sashabaranov/go-openai"
-	"github.com/tmc/langchaingo/jsonschema"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
+	openai "github.com/sashabaranov/go-openai"
+	jsonschema "github.com/sashabaranov/go-openai/jsonschema"
 )
 
 // Get repository README from HF
@@ -13,7 +13,7 @@ type HFReadmeTool struct {
 	client *hfapi.Client
 }
 
-func (s *HFReadmeTool) Run(args map[string]any) (string, error) {
+func (s *HFReadmeTool) Execute(args map[string]any) (string, error) {
 	q, ok := args["repository"].(string)
 	if !ok {
 		return "", fmt.Errorf("no query")
diff --git a/.github/workflows/gallery-agent.yaml b/.github/workflows/gallery-agent.yaml
index e796d2f2cba3..4c4074bbf215 100644
--- a/.github/workflows/gallery-agent.yaml
+++ b/.github/workflows/gallery-agent.yaml
@@ -39,11 +39,6 @@ jobs:
         with:
           go-version: '1.21'
 
-      - name: Build gallery agent
-        run: |
-          cd .github/gallery-agent
-          go mod download
-          go build -o gallery-agent .
 
       - name: Run gallery agent
         env:
@@ -56,9 +51,7 @@ jobs:
           MAX_MODELS: ${{ github.event.inputs.max_models || '1' }}
         run: |
           export GALLERY_INDEX_PATH=$PWD/gallery/index.yaml
-          cd .github/gallery-agent
-          ./gallery-agent
-          rm -rf gallery-agent
+          go run .github/gallery-agent
 
       - name: Check for changes
         id: check_changes
diff --git a/go.mod b/go.mod
index f520ff4edfed..464ec1553086 100644
--- a/go.mod
+++ b/go.mod
@@ -59,6 +59,7 @@ require (
 	go.opentelemetry.io/otel/metric v1.38.0
 	go.opentelemetry.io/otel/sdk/metric v1.38.0
 	google.golang.org/grpc v1.76.0
+	google.golang.org/protobuf v1.36.8
 	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
 	oras.land/oras-go/v2 v2.6.0
@@ -148,7 +149,6 @@ require (
 	golang.org/x/oauth2 v0.30.0 // indirect
 	golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053 // indirect
 	golang.org/x/time v0.12.0 // indirect
-	google.golang.org/protobuf v1.36.8 // indirect
 )
 
 require (
diff --git a/.github/gallery-agent/hfapi/client.go b/pkg/huggingface-api/client.go
similarity index 100%
rename from .github/gallery-agent/hfapi/client.go
rename to pkg/huggingface-api/client.go
diff --git a/.github/gallery-agent/hfapi/client_test.go b/pkg/huggingface-api/client_test.go
similarity index 99%
rename from .github/gallery-agent/hfapi/client_test.go
rename to pkg/huggingface-api/client_test.go
index c490d0e9deb3..37a381416125 100644
--- a/.github/gallery-agent/hfapi/client_test.go
+++ b/pkg/huggingface-api/client_test.go
@@ -8,7 +8,7 @@ import (
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 
-	"github.com/go-skynet/LocalAI/.github/gallery-agent/hfapi"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 )
 
 var _ = Describe("HuggingFace API Client", func() {
diff --git a/.github/gallery-agent/hfapi/hfapi_suite_test.go b/pkg/huggingface-api/hfapi_suite_test.go
similarity index 99%
rename from .github/gallery-agent/hfapi/hfapi_suite_test.go
rename to pkg/huggingface-api/hfapi_suite_test.go
index 6dca0c029cbe..ec4654c5a9a7 100644
--- a/.github/gallery-agent/hfapi/hfapi_suite_test.go
+++ b/pkg/huggingface-api/hfapi_suite_test.go
@@ -11,3 +11,5 @@ func TestHfapi(t *testing.T) {
 	RegisterFailHandler(Fail)
 	RunSpecs(t, "HuggingFace API Suite")
 }
+
+

From cbf567a92ba7bdb92e0f0a52c2e2b40ac525e25f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 11 Nov 2025 21:54:21 +0100
Subject: [PATCH 04/16] Add simple importer for gguf files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/gallery/importers/llama-cpp.go         | 92 +++++++++++++++++++++
 core/http/endpoints/localai/import_model.go | 25 +++++-
 2 files changed, 113 insertions(+), 4 deletions(-)
 create mode 100644 core/gallery/importers/llama-cpp.go

diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go
new file mode 100644
index 000000000000..e91afe9972d8
--- /dev/null
+++ b/core/gallery/importers/llama-cpp.go
@@ -0,0 +1,92 @@
+package importers
+
+import (
+	"encoding/json"
+	"path/filepath"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/pkg/functions"
+	"go.yaml.in/yaml/v2"
+)
+
+type LlamaCPPImporter struct{}
+
+func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest) bool {
+	preferences, err := request.Preferences.MarshalJSON()
+	if err != nil {
+		return false
+	}
+	preferencesMap := make(map[string]any)
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return false
+	}
+
+	if preferencesMap["backend"] == "llama-cpp" {
+		return true
+	}
+
+	return strings.HasSuffix(uri, ".gguf")
+}
+
+func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) {
+	preferences, err := request.Preferences.MarshalJSON()
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+	preferencesMap := make(map[string]any)
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+
+	name, ok := preferencesMap["name"].(string)
+	if !ok {
+		name = filepath.Base(uri)
+	}
+
+	description, ok := preferencesMap["description"].(string)
+	if !ok {
+		description = "Imported from " + uri
+	}
+
+	modelConfig := config.ModelConfig{
+		Name:                name,
+		Description:         description,
+		KnownUsecaseStrings: []string{"chat"},
+		Backend:             "llama-cpp",
+		PredictionOptions: schema.PredictionOptions{
+			BasicModelRequest: schema.BasicModelRequest{
+				Model: filepath.Base(uri),
+			},
+		},
+		TemplateConfig: config.TemplateConfig{
+			UseTokenizerTemplate: true,
+		},
+		FunctionsConfig: functions.FunctionsConfig{
+			GrammarConfig: functions.GrammarConfig{
+				NoGrammar: true,
+			},
+		},
+	}
+
+	data, err := yaml.Marshal(modelConfig)
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+
+	return gallery.ModelConfig{
+		Name:        name,
+		Description: description,
+		ConfigFile:  string(data),
+		Files: []gallery.File{
+			{
+				URI:      uri,
+				Filename: filepath.Base(uri),
+			},
+		},
+	}, nil
+}
diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go
index e579642ed10e..eb6dbc80322c 100644
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -11,6 +11,7 @@ import (
 	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/gallery/importers"
 	httpUtils "github.com/mudler/LocalAI/core/http/utils"
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
@@ -19,6 +20,15 @@ import (
 	"gopkg.in/yaml.v3"
 )
 
+type Importer interface {
+	Match(uri string, request schema.ImportModelRequest) bool
+	Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error)
+}
+
+var defaultImporters = []Importer{
+	&importers.LlamaCPPImporter{},
+}
+
 // ImportModelURIEndpoint handles creating new model configurations from a URI
 func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) fiber.Handler {
 	return func(c *fiber.Ctx) error {
@@ -29,10 +39,17 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
 			return err
 		}
 
-		// From the input, we are going to extract a default settings for the model, and use the gallery service
-		// TODO: This is now all fake data
-		modelConfig := gallery.ModelConfig{
-			Name: "imported-model-name",
+		var err error
+		var modelConfig gallery.ModelConfig
+
+		for _, importer := range defaultImporters {
+			if importer.Match(input.URI, *input) {
+				modelConfig, err = importer.Import(input.URI, *input)
+				if err != nil {
+					continue
+				}
+				break
+			}
 		}
 
 		uuid, err := uuid.NewUUID()

From 0a9d6fa75d860abc59182977f9033d1f27e361fc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 11 Nov 2025 21:58:59 +0100
Subject: [PATCH 05/16] Add opcache

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/app.go                            | 11 ++++++++---
 core/http/endpoints/localai/import_model.go | 21 +++++++++++++++++----
 core/http/routes/localai.go                 |  5 +++--
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/core/http/app.go b/core/http/app.go
index dcd9a2219958..916ea9600b81 100644
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -200,11 +200,16 @@ func API(application *application.Application) (*fiber.App, error) {
 	requestExtractor := middleware.NewRequestExtractor(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
 
 	routes.RegisterElevenLabsRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
-	routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
+	
+	// Create opcache for tracking UI operations (used by both UI and LocalAI routes)
+	var opcache *services.OpCache
+	if !application.ApplicationConfig().DisableWebUI {
+		opcache = services.NewOpCache(application.GalleryService())
+	}
+	
+	routes.RegisterLocalAIRoutes(router, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
 	routes.RegisterOpenAIRoutes(router, requestExtractor, application)
 	if !application.ApplicationConfig().DisableWebUI {
-		// Create opcache for tracking UI operations
-		opcache := services.NewOpCache(application.GalleryService())
 		routes.RegisterUIAPIRoutes(router, application.ModelConfigLoader(), application.ApplicationConfig(), application.GalleryService(), opcache)
 		routes.RegisterUIRoutes(router, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService())
 	}
diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go
index eb6dbc80322c..73e47e9b549d 100644
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -30,7 +30,7 @@ var defaultImporters = []Importer{
 }
 
 // ImportModelURIEndpoint handles creating new model configurations from a URI
-func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService) fiber.Handler {
+func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) fiber.Handler {
 	return func(c *fiber.Ctx) error {
 
 		input := new(schema.ImportModelRequest)
@@ -56,13 +56,26 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
 		if err != nil {
 			return err
 		}
+
+		// Determine gallery ID for tracking - use model name if available, otherwise use URI
+		galleryID := input.URI
+		if modelConfig.Name != "" {
+			galleryID = modelConfig.Name
+		}
+
+		// Register operation in opcache if available (for UI progress tracking)
+		if opcache != nil {
+			opcache.Set(galleryID, uuid.String())
+		}
+
 		galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
 			Req: gallery.GalleryModel{
 				Overrides: map[string]interface{}{},
 			},
-			ID:               uuid.String(),
-			GalleryElement:   &modelConfig,
-			BackendGalleries: appConfig.BackendGalleries,
+			ID:                 uuid.String(),
+			GalleryElementName: galleryID,
+			GalleryElement:     &modelConfig,
+			BackendGalleries:   appConfig.BackendGalleries,
 		}
 
 		return c.JSON(schema.GalleryResponse{
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index 119a02b55311..5d9ae821007d 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -18,7 +18,8 @@ func RegisterLocalAIRoutes(router *fiber.App,
 	cl *config.ModelConfigLoader,
 	ml *model.ModelLoader,
 	appConfig *config.ApplicationConfig,
-	galleryService *services.GalleryService) {
+	galleryService *services.GalleryService,
+	opcache *services.OpCache) {
 
 	router.Get("/swagger/*", swagger.HandlerDefault) // default
 
@@ -58,7 +59,7 @@ func RegisterLocalAIRoutes(router *fiber.App,
 		router.Post("/models/import", localai.ImportModelEndpoint(cl, appConfig))
 
 		// URI model import endpoint
-		router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService))
+		router.Post("/models/import-uri", localai.ImportModelURIEndpoint(cl, appConfig, galleryService, opcache))
 
 		// Custom model edit endpoint
 		router.Post("/models/edit/:name", localai.EditModelEndpoint(cl, appConfig))

From 9c0fda137442aedcb065772860c2129a68086507 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 11 Nov 2025 22:22:52 +0100
Subject: [PATCH 06/16] wire importers to CLI

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/application/startup.go                 | 12 +++--
 core/cli/models.go                          | 10 +++-
 core/gallery/importers/importers.go         | 15 ++++++
 core/gallery/importers/llama-cpp.go         |  2 +
 core/http/endpoints/localai/import_model.go | 11 +---
 core/startup/model_preload.go               | 57 ++++++++++++++++++++-
 core/startup/model_preload_test.go          |  4 +-
 7 files changed, 91 insertions(+), 20 deletions(-)
 create mode 100644 core/gallery/importers/importers.go

diff --git a/core/application/startup.go b/core/application/startup.go
index a497e0e6057e..8e2387b9226f 100644
--- a/core/application/startup.go
+++ b/core/application/startup.go
@@ -22,9 +22,15 @@ func New(opts ...config.AppOption) (*Application, error) {
 
 	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.SystemState.Model.ModelsPath)
 	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
+
+	if err := application.start(); err != nil {
+		return nil, err
+	}
+
 	caps, err := xsysinfo.CPUCapabilities()
 	if err == nil {
 		log.Debug().Msgf("CPU capabilities: %v", caps)
+
 	}
 	gpus, err := xsysinfo.GPUs()
 	if err == nil {
@@ -56,7 +62,7 @@ func New(opts ...config.AppOption) (*Application, error) {
 		}
 	}
 
-	if err := coreStartup.InstallModels(options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
+	if err := coreStartup.InstallModels(application.GalleryService(), options.Galleries, options.BackendGalleries, options.SystemState, application.ModelLoader(), options.EnforcePredownloadScans, options.AutoloadBackendGalleries, nil, options.ModelsURL...); err != nil {
 		log.Error().Err(err).Msg("error installing models")
 	}
 
@@ -152,10 +158,6 @@ func New(opts ...config.AppOption) (*Application, error) {
 	// Watch the configuration directory
 	startWatcher(options)
 
-	if err := application.start(); err != nil {
-		return nil, err
-	}
-
 	log.Info().Msg("core/startup process completed!")
 	return application, nil
 }
diff --git a/core/cli/models.go b/core/cli/models.go
index 2b8da49c466a..dd5273317bfc 100644
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -1,12 +1,14 @@
 package cli
 
 import (
+	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 
 	cliContext "github.com/mudler/LocalAI/core/cli/context"
 	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/services"
 
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/startup"
@@ -78,6 +80,12 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 		return err
 	}
 
+	galleryService := services.NewGalleryService(&config.ApplicationConfig{}, model.NewModelLoader(systemState, true))
+	err = galleryService.Start(context.Background(), config.NewModelConfigLoader(mi.ModelsPath), systemState)
+	if err != nil {
+		return err
+	}
+
 	var galleries []config.Gallery
 	if err := json.Unmarshal([]byte(mi.Galleries), &galleries); err != nil {
 		log.Error().Err(err).Msg("unable to load galleries")
@@ -127,7 +135,7 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 		}
 
 		modelLoader := model.NewModelLoader(systemState, true)
-		err = startup.InstallModels(galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
+		err = startup.InstallModels(galleryService, galleries, backendGalleries, systemState, modelLoader, !mi.DisablePredownloadScan, mi.AutoloadBackendGalleries, progressCallback, modelName)
 		if err != nil {
 			return err
 		}
diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go
new file mode 100644
index 000000000000..5236af0cc245
--- /dev/null
+++ b/core/gallery/importers/importers.go
@@ -0,0 +1,15 @@
+package importers
+
+import (
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/schema"
+)
+
+var DefaultImporters = []Importer{
+	&LlamaCPPImporter{},
+}
+
+type Importer interface {
+	Match(uri string, request schema.ImportModelRequest) bool
+	Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error)
+}
diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go
index e91afe9972d8..3596c868f50a 100644
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -12,6 +12,8 @@ import (
 	"go.yaml.in/yaml/v2"
 )
 
+var _ Importer = &LlamaCPPImporter{}
+
 type LlamaCPPImporter struct{}
 
 func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest) bool {
diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go
index 73e47e9b549d..ea71ae53ae5d 100644
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -20,15 +20,6 @@ import (
 	"gopkg.in/yaml.v3"
 )
 
-type Importer interface {
-	Match(uri string, request schema.ImportModelRequest) bool
-	Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error)
-}
-
-var defaultImporters = []Importer{
-	&importers.LlamaCPPImporter{},
-}
-
 // ImportModelURIEndpoint handles creating new model configurations from a URI
 func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) fiber.Handler {
 	return func(c *fiber.Ctx) error {
@@ -42,7 +33,7 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
 		var err error
 		var modelConfig gallery.ModelConfig
 
-		for _, importer := range defaultImporters {
+		for _, importer := range importers.DefaultImporters {
 			if importer.Match(input.URI, *input) {
 				modelConfig, err = importer.Import(input.URI, *input)
 				if err != nil {
diff --git a/core/startup/model_preload.go b/core/startup/model_preload.go
index a98e0592bd80..4550a25e46f3 100644
--- a/core/startup/model_preload.go
+++ b/core/startup/model_preload.go
@@ -7,9 +7,14 @@ import (
 	"path"
 	"path/filepath"
 	"strings"
+	"time"
 
+	"github.com/google/uuid"
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/gallery/importers"
+	"github.com/mudler/LocalAI/core/schema"
+	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/system"
@@ -25,7 +30,7 @@ const (
 // InstallModels will preload models from the given list of URLs and galleries
 // It will download the model if it is not already present in the model path
 // It will also try to resolve if the model is an embedded model YAML configuration
-func InstallModels(galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
+func InstallModels(galleryService *services.GalleryService, galleries, backendGalleries []config.Gallery, systemState *system.SystemState, modelLoader *model.ModelLoader, enforceScan, autoloadBackendGalleries bool, downloadStatus func(string, string, string, float64), models ...string) error {
 	// create an error that groups all errors
 	var err error
 
@@ -154,7 +159,55 @@ func InstallModels(galleries, backendGalleries []config.Gallery, systemState *sy
 					err = errors.Join(err, e)
 				} else if !found {
 					log.Warn().Msgf("[startup] failed resolving model '%s'", url)
-					err = errors.Join(err, fmt.Errorf("failed resolving model '%s'", url))
+
+					if galleryService == nil {
+						err = errors.Join(err, fmt.Errorf("cannot start autoimporter, not sure how to handle this uri"))
+						continue
+					}
+
+					// TODO: start autoimporter
+					var err error
+					var modelConfig gallery.ModelConfig
+					for _, importer := range importers.DefaultImporters {
+						if importer.Match(url, schema.ImportModelRequest{}) {
+							modelConfig, err = importer.Import(url, schema.ImportModelRequest{})
+							if err != nil {
+								continue
+							}
+							break
+						}
+					}
+
+					uuid, err := uuid.NewUUID()
+					if err != nil {
+						return err
+					}
+
+					galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{
+						Req: gallery.GalleryModel{
+							Overrides: map[string]interface{}{},
+						},
+						ID:                 uuid.String(),
+						GalleryElementName: modelConfig.Name,
+						GalleryElement:     &modelConfig,
+						BackendGalleries:   backendGalleries,
+					}
+
+					var status *services.GalleryOpStatus
+					// wait for op to finish
+					for {
+						status = galleryService.GetStatus(uuid.String())
+						if status != nil && status.Processed {
+							break
+						}
+						time.Sleep(1 * time.Second)
+					}
+
+					if status.Error != nil {
+						return status.Error
+					}
+
+					log.Info().Msgf("[startup] imported model '%s' from '%s'", modelConfig.Name, url)
 				}
 			}
 		}
diff --git a/core/startup/model_preload_test.go b/core/startup/model_preload_test.go
index 16096f41befa..255c3dc4e65a 100644
--- a/core/startup/model_preload_test.go
+++ b/core/startup/model_preload_test.go
@@ -33,7 +33,7 @@ var _ = Describe("Preload test", func() {
 			url := "https://raw.githubusercontent.com/mudler/LocalAI-examples/main/configurations/phi-2.yaml"
 			fileName := fmt.Sprintf("%s.yaml", "phi-2")
 
-			InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
+			InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
 
 			resultFile := filepath.Join(tmpdir, fileName)
 
@@ -46,7 +46,7 @@ var _ = Describe("Preload test", func() {
 			url := "huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
 			fileName := fmt.Sprintf("%s.gguf", "tinyllama-1.1b-chat-v0.3.Q2_K")
 
-			err := InstallModels([]config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
+			err := InstallModels(nil, []config.Gallery{}, []config.Gallery{}, systemState, ml, true, true, nil, url)
 			Expect(err).ToNot(HaveOccurred())
 
 			resultFile := filepath.Join(tmpdir, fileName)

From af6070ac602235dc371d8ca95a216623fa8725cf Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Tue, 11 Nov 2025 22:30:39 +0100
Subject: [PATCH 07/16] Add omitempty to config fields

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/config/model_config.go | 292 ++++++++++++++++++------------------
 core/schema/prediction.go   |  49 +++---
 core/schema/request.go      |   3 +-
 pkg/functions/parse.go      |  59 ++++----
 4 files changed, 208 insertions(+), 195 deletions(-)

diff --git a/core/config/model_config.go b/core/config/model_config.go
index 1dee82363d2f..41664f2a3dd4 100644
--- a/core/config/model_config.go
+++ b/core/config/model_config.go
@@ -16,30 +16,31 @@ const (
 	RAND_SEED = -1
 )
 
+// @Description TTS configuration
 type TTSConfig struct {
 
 	// Voice wav path or id
-	Voice string `yaml:"voice" json:"voice"`
+	Voice string `yaml:"voice,omitempty" json:"voice,omitempty"`
 
-	AudioPath string `yaml:"audio_path" json:"audio_path"`
+	AudioPath string `yaml:"audio_path,omitempty" json:"audio_path,omitempty"`
 }
 
-// ModelConfig represents a model configuration
+// @Description ModelConfig represents a model configuration
 type ModelConfig struct {
 	modelConfigFile          string `yaml:"-" json:"-"`
-	schema.PredictionOptions `yaml:"parameters" json:"parameters"`
-	Name                     string `yaml:"name" json:"name"`
-
-	F16                 *bool                `yaml:"f16" json:"f16"`
-	Threads             *int                 `yaml:"threads" json:"threads"`
-	Debug               *bool                `yaml:"debug" json:"debug"`
-	Roles               map[string]string    `yaml:"roles" json:"roles"`
-	Embeddings          *bool                `yaml:"embeddings" json:"embeddings"`
-	Backend             string               `yaml:"backend" json:"backend"`
-	TemplateConfig      TemplateConfig       `yaml:"template" json:"template"`
-	KnownUsecaseStrings []string             `yaml:"known_usecases" json:"known_usecases"`
+	schema.PredictionOptions `yaml:"parameters,omitempty" json:"parameters,omitempty"`
+	Name                     string `yaml:"name,omitempty" json:"name,omitempty"`
+
+	F16                 *bool                `yaml:"f16,omitempty" json:"f16,omitempty"`
+	Threads             *int                 `yaml:"threads,omitempty" json:"threads,omitempty"`
+	Debug               *bool                `yaml:"debug,omitempty" json:"debug,omitempty"`
+	Roles               map[string]string    `yaml:"roles,omitempty" json:"roles,omitempty"`
+	Embeddings          *bool                `yaml:"embeddings,omitempty" json:"embeddings,omitempty"`
+	Backend             string               `yaml:"backend,omitempty" json:"backend,omitempty"`
+	TemplateConfig      TemplateConfig       `yaml:"template,omitempty" json:"template,omitempty"`
+	KnownUsecaseStrings []string             `yaml:"known_usecases,omitempty" json:"known_usecases,omitempty"`
 	KnownUsecases       *ModelConfigUsecases `yaml:"-" json:"-"`
-	Pipeline            Pipeline             `yaml:"pipeline" json:"pipeline"`
+	Pipeline            Pipeline             `yaml:"pipeline,omitempty" json:"pipeline,omitempty"`
 
 	PromptStrings, InputStrings                []string               `yaml:"-" json:"-"`
 	InputToken                                 [][]int                `yaml:"-" json:"-"`
@@ -47,50 +48,52 @@ type ModelConfig struct {
 	ResponseFormat                             string                 `yaml:"-" json:"-"`
 	ResponseFormatMap                          map[string]interface{} `yaml:"-" json:"-"`
 
-	FunctionsConfig functions.FunctionsConfig `yaml:"function" json:"function"`
+	FunctionsConfig functions.FunctionsConfig `yaml:"function,omitempty" json:"function,omitempty"`
 
-	FeatureFlag FeatureFlag `yaml:"feature_flags" json:"feature_flags"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
+	FeatureFlag FeatureFlag `yaml:"feature_flags,omitempty" json:"feature_flags,omitempty"` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
 	// LLM configs (GPT4ALL, Llama.cpp, ...)
 	LLMConfig `yaml:",inline" json:",inline"`
 
 	// Diffusers
-	Diffusers Diffusers `yaml:"diffusers" json:"diffusers"`
-	Step      int       `yaml:"step" json:"step"`
+	Diffusers Diffusers `yaml:"diffusers,omitempty" json:"diffusers,omitempty"`
+	Step      int       `yaml:"step,omitempty" json:"step,omitempty"`
 
 	// GRPC Options
-	GRPC GRPC `yaml:"grpc" json:"grpc"`
+	GRPC GRPC `yaml:"grpc,omitempty" json:"grpc,omitempty"`
 
 	// TTS specifics
-	TTSConfig `yaml:"tts" json:"tts"`
+	TTSConfig `yaml:"tts,omitempty" json:"tts,omitempty"`
 
 	// CUDA
 	// Explicitly enable CUDA or not (some backends might need it)
-	CUDA bool `yaml:"cuda" json:"cuda"`
+	CUDA bool `yaml:"cuda,omitempty" json:"cuda,omitempty"`
 
-	DownloadFiles []File `yaml:"download_files" json:"download_files"`
+	DownloadFiles []File `yaml:"download_files,omitempty" json:"download_files,omitempty"`
 
-	Description string `yaml:"description" json:"description"`
-	Usage       string `yaml:"usage" json:"usage"`
+	Description string `yaml:"description,omitempty" json:"description,omitempty"`
+	Usage       string `yaml:"usage,omitempty" json:"usage,omitempty"`
 
-	Options   []string `yaml:"options" json:"options"`
-	Overrides []string `yaml:"overrides" json:"overrides"`
+	Options   []string `yaml:"options,omitempty" json:"options,omitempty"`
+	Overrides []string `yaml:"overrides,omitempty" json:"overrides,omitempty"`
 
-	MCP   MCPConfig   `yaml:"mcp" json:"mcp"`
-	Agent AgentConfig `yaml:"agent" json:"agent"`
+	MCP   MCPConfig   `yaml:"mcp,omitempty" json:"mcp,omitempty"`
+	Agent AgentConfig `yaml:"agent,omitempty" json:"agent,omitempty"`
 }
 
+// @Description MCP configuration
 type MCPConfig struct {
-	Servers string `yaml:"remote" json:"remote"`
-	Stdio   string `yaml:"stdio" json:"stdio"`
+	Servers string `yaml:"remote,omitempty" json:"remote,omitempty"`
+	Stdio   string `yaml:"stdio,omitempty" json:"stdio,omitempty"`
 }
 
+// @Description Agent configuration
 type AgentConfig struct {
-	MaxAttempts           int  `yaml:"max_attempts" json:"max_attempts"`
-	MaxIterations         int  `yaml:"max_iterations" json:"max_iterations"`
-	EnableReasoning       bool `yaml:"enable_reasoning" json:"enable_reasoning"`
-	EnablePlanning        bool `yaml:"enable_planning" json:"enable_planning"`
-	EnableMCPPrompts      bool `yaml:"enable_mcp_prompts" json:"enable_mcp_prompts"`
-	EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator" json:"enable_plan_re_evaluator"`
+	MaxAttempts           int  `yaml:"max_attempts,omitempty" json:"max_attempts,omitempty"`
+	MaxIterations         int  `yaml:"max_iterations,omitempty" json:"max_iterations,omitempty"`
+	EnableReasoning       bool `yaml:"enable_reasoning,omitempty" json:"enable_reasoning,omitempty"`
+	EnablePlanning        bool `yaml:"enable_planning,omitempty" json:"enable_planning,omitempty"`
+	EnableMCPPrompts      bool `yaml:"enable_mcp_prompts,omitempty" json:"enable_mcp_prompts,omitempty"`
+	EnablePlanReEvaluator bool `yaml:"enable_plan_re_evaluator,omitempty" json:"enable_plan_re_evaluator,omitempty"`
 }
 
 func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCPGenericConfig[MCPSTDIOServers], error) {
@@ -107,35 +110,39 @@ func (c *MCPConfig) MCPConfigFromYAML() (MCPGenericConfig[MCPRemoteServers], MCP
 	return remote, stdio, nil
 }
 
+// @Description MCP generic configuration
 type MCPGenericConfig[T any] struct {
-	Servers T `yaml:"mcpServers" json:"mcpServers"`
+	Servers T `yaml:"mcpServers,omitempty" json:"mcpServers,omitempty"`
 }
 type MCPRemoteServers map[string]MCPRemoteServer
 type MCPSTDIOServers map[string]MCPSTDIOServer
 
+// @Description MCP remote server configuration
 type MCPRemoteServer struct {
-	URL   string `json:"url"`
-	Token string `json:"token"`
+	URL   string `json:"url,omitempty"`
+	Token string `json:"token,omitempty"`
 }
 
+// @Description MCP STDIO server configuration
 type MCPSTDIOServer struct {
-	Args    []string          `json:"args"`
-	Env     map[string]string `json:"env"`
-	Command string            `json:"command"`
+	Args    []string          `json:"args,omitempty"`
+	Env     map[string]string `json:"env,omitempty"`
+	Command string            `json:"command,omitempty"`
 }
 
-// Pipeline defines other models to use for audio-to-audio
+// @Description Pipeline defines other models to use for audio-to-audio
 type Pipeline struct {
-	TTS           string `yaml:"tts" json:"tts"`
-	LLM           string `yaml:"llm" json:"llm"`
-	Transcription string `yaml:"transcription" json:"transcription"`
-	VAD           string `yaml:"vad" json:"vad"`
+	TTS           string `yaml:"tts,omitempty" json:"tts,omitempty"`
+	LLM           string `yaml:"llm,omitempty" json:"llm,omitempty"`
+	Transcription string `yaml:"transcription,omitempty" json:"transcription,omitempty"`
+	VAD           string `yaml:"vad,omitempty" json:"vad,omitempty"`
 }
 
+// @Description File configuration for model downloads
 type File struct {
-	Filename string         `yaml:"filename" json:"filename"`
-	SHA256   string         `yaml:"sha256" json:"sha256"`
-	URI      downloader.URI `yaml:"uri" json:"uri"`
+	Filename string         `yaml:"filename,omitempty" json:"filename,omitempty"`
+	SHA256   string         `yaml:"sha256,omitempty" json:"sha256,omitempty"`
+	URI      downloader.URI `yaml:"uri,omitempty" json:"uri,omitempty"`
 }
 
 type FeatureFlag map[string]*bool
@@ -147,124 +154,125 @@ func (ff FeatureFlag) Enabled(s string) bool {
 	return false
 }
 
+// @Description GRPC configuration
 type GRPC struct {
-	Attempts          int `yaml:"attempts" json:"attempts"`
-	AttemptsSleepTime int `yaml:"attempts_sleep_time" json:"attempts_sleep_time"`
+	Attempts          int `yaml:"attempts,omitempty" json:"attempts,omitempty"`
+	AttemptsSleepTime int `yaml:"attempts_sleep_time,omitempty" json:"attempts_sleep_time,omitempty"`
 }
 
+// @Description Diffusers configuration
 type Diffusers struct {
-	CUDA             bool   `yaml:"cuda" json:"cuda"`
-	PipelineType     string `yaml:"pipeline_type" json:"pipeline_type"`
-	SchedulerType    string `yaml:"scheduler_type" json:"scheduler_type"`
-	EnableParameters string `yaml:"enable_parameters" json:"enable_parameters"` // A list of comma separated parameters to specify
-	IMG2IMG          bool   `yaml:"img2img" json:"img2img"`                     // Image to Image Diffuser
-	ClipSkip         int    `yaml:"clip_skip" json:"clip_skip"`                 // Skip every N frames
-	ClipModel        string `yaml:"clip_model" json:"clip_model"`               // Clip model to use
-	ClipSubFolder    string `yaml:"clip_subfolder" json:"clip_subfolder"`       // Subfolder to use for clip model
-	ControlNet       string `yaml:"control_net" json:"control_net"`
-}
-
-// LLMConfig is a struct that holds the configuration that are
-// generic for most of the LLM backends.
+	CUDA             bool   `yaml:"cuda,omitempty" json:"cuda,omitempty"`
+	PipelineType     string `yaml:"pipeline_type,omitempty" json:"pipeline_type,omitempty"`
+	SchedulerType    string `yaml:"scheduler_type,omitempty" json:"scheduler_type,omitempty"`
+	EnableParameters string `yaml:"enable_parameters,omitempty" json:"enable_parameters,omitempty"` // A list of comma separated parameters to specify
+	IMG2IMG          bool   `yaml:"img2img,omitempty" json:"img2img,omitempty"`                     // Image to Image Diffuser
+	ClipSkip         int    `yaml:"clip_skip,omitempty" json:"clip_skip,omitempty"`                 // Skip every N frames
+	ClipModel        string `yaml:"clip_model,omitempty" json:"clip_model,omitempty"`               // Clip model to use
+	ClipSubFolder    string `yaml:"clip_subfolder,omitempty" json:"clip_subfolder,omitempty"`       // Subfolder to use for clip model
+	ControlNet       string `yaml:"control_net,omitempty" json:"control_net,omitempty"`
+}
+
+// @Description LLMConfig is a struct that holds the configuration that are generic for most of the LLM backends.
 type LLMConfig struct {
-	SystemPrompt    string   `yaml:"system_prompt" json:"system_prompt"`
-	TensorSplit     string   `yaml:"tensor_split" json:"tensor_split"`
-	MainGPU         string   `yaml:"main_gpu" json:"main_gpu"`
-	RMSNormEps      float32  `yaml:"rms_norm_eps" json:"rms_norm_eps"`
-	NGQA            int32    `yaml:"ngqa" json:"ngqa"`
-	PromptCachePath string   `yaml:"prompt_cache_path" json:"prompt_cache_path"`
-	PromptCacheAll  bool     `yaml:"prompt_cache_all" json:"prompt_cache_all"`
-	PromptCacheRO   bool     `yaml:"prompt_cache_ro" json:"prompt_cache_ro"`
-	MirostatETA     *float64 `yaml:"mirostat_eta" json:"mirostat_eta"`
-	MirostatTAU     *float64 `yaml:"mirostat_tau" json:"mirostat_tau"`
-	Mirostat        *int     `yaml:"mirostat" json:"mirostat"`
-	NGPULayers      *int     `yaml:"gpu_layers" json:"gpu_layers"`
-	MMap            *bool    `yaml:"mmap" json:"mmap"`
-	MMlock          *bool    `yaml:"mmlock" json:"mmlock"`
-	LowVRAM         *bool    `yaml:"low_vram" json:"low_vram"`
-	Reranking       *bool    `yaml:"reranking" json:"reranking"`
-	Grammar         string   `yaml:"grammar" json:"grammar"`
-	StopWords       []string `yaml:"stopwords" json:"stopwords"`
-	Cutstrings      []string `yaml:"cutstrings" json:"cutstrings"`
-	ExtractRegex    []string `yaml:"extract_regex" json:"extract_regex"`
-	TrimSpace       []string `yaml:"trimspace" json:"trimspace"`
-	TrimSuffix      []string `yaml:"trimsuffix" json:"trimsuffix"`
-
-	ContextSize          *int             `yaml:"context_size" json:"context_size"`
-	NUMA                 bool             `yaml:"numa" json:"numa"`
-	LoraAdapter          string           `yaml:"lora_adapter" json:"lora_adapter"`
-	LoraBase             string           `yaml:"lora_base" json:"lora_base"`
-	LoraAdapters         []string         `yaml:"lora_adapters" json:"lora_adapters"`
-	LoraScales           []float32        `yaml:"lora_scales" json:"lora_scales"`
-	LoraScale            float32          `yaml:"lora_scale" json:"lora_scale"`
-	NoMulMatQ            bool             `yaml:"no_mulmatq" json:"no_mulmatq"`
-	DraftModel           string           `yaml:"draft_model" json:"draft_model"`
-	NDraft               int32            `yaml:"n_draft" json:"n_draft"`
-	Quantization         string           `yaml:"quantization" json:"quantization"`
-	LoadFormat           string           `yaml:"load_format" json:"load_format"`
-	GPUMemoryUtilization float32          `yaml:"gpu_memory_utilization" json:"gpu_memory_utilization"` // vLLM
-	TrustRemoteCode      bool             `yaml:"trust_remote_code" json:"trust_remote_code"`           // vLLM
-	EnforceEager         bool             `yaml:"enforce_eager" json:"enforce_eager"`                   // vLLM
-	SwapSpace            int              `yaml:"swap_space" json:"swap_space"`                         // vLLM
-	MaxModelLen          int              `yaml:"max_model_len" json:"max_model_len"`                   // vLLM
-	TensorParallelSize   int              `yaml:"tensor_parallel_size" json:"tensor_parallel_size"`     // vLLM
-	DisableLogStatus     bool             `yaml:"disable_log_stats" json:"disable_log_stats"`           // vLLM
-	DType                string           `yaml:"dtype" json:"dtype"`                                   // vLLM
-	LimitMMPerPrompt     LimitMMPerPrompt `yaml:"limit_mm_per_prompt" json:"limit_mm_per_prompt"`       // vLLM
-	MMProj               string           `yaml:"mmproj" json:"mmproj"`
-
-	FlashAttention *string `yaml:"flash_attention" json:"flash_attention"`
-	NoKVOffloading bool    `yaml:"no_kv_offloading" json:"no_kv_offloading"`
-	CacheTypeK     string  `yaml:"cache_type_k" json:"cache_type_k"`
-	CacheTypeV     string  `yaml:"cache_type_v" json:"cache_type_v"`
-
-	RopeScaling string `yaml:"rope_scaling" json:"rope_scaling"`
-	ModelType   string `yaml:"type" json:"type"`
-
-	YarnExtFactor  float32 `yaml:"yarn_ext_factor" json:"yarn_ext_factor"`
-	YarnAttnFactor float32 `yaml:"yarn_attn_factor" json:"yarn_attn_factor"`
-	YarnBetaFast   float32 `yaml:"yarn_beta_fast" json:"yarn_beta_fast"`
-	YarnBetaSlow   float32 `yaml:"yarn_beta_slow" json:"yarn_beta_slow"`
-
-	CFGScale float32 `yaml:"cfg_scale" json:"cfg_scale"` // Classifier-Free Guidance Scale
-}
-
-// LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
+	SystemPrompt    string   `yaml:"system_prompt,omitempty" json:"system_prompt,omitempty"`
+	TensorSplit     string   `yaml:"tensor_split,omitempty" json:"tensor_split,omitempty"`
+	MainGPU         string   `yaml:"main_gpu,omitempty" json:"main_gpu,omitempty"`
+	RMSNormEps      float32  `yaml:"rms_norm_eps,omitempty" json:"rms_norm_eps,omitempty"`
+	NGQA            int32    `yaml:"ngqa,omitempty" json:"ngqa,omitempty"`
+	PromptCachePath string   `yaml:"prompt_cache_path,omitempty" json:"prompt_cache_path,omitempty"`
+	PromptCacheAll  bool     `yaml:"prompt_cache_all,omitempty" json:"prompt_cache_all,omitempty"`
+	PromptCacheRO   bool     `yaml:"prompt_cache_ro,omitempty" json:"prompt_cache_ro,omitempty"`
+	MirostatETA     *float64 `yaml:"mirostat_eta,omitempty" json:"mirostat_eta,omitempty"`
+	MirostatTAU     *float64 `yaml:"mirostat_tau,omitempty" json:"mirostat_tau,omitempty"`
+	Mirostat        *int     `yaml:"mirostat,omitempty" json:"mirostat,omitempty"`
+	NGPULayers      *int     `yaml:"gpu_layers,omitempty" json:"gpu_layers,omitempty"`
+	MMap            *bool    `yaml:"mmap,omitempty" json:"mmap,omitempty"`
+	MMlock          *bool    `yaml:"mmlock,omitempty" json:"mmlock,omitempty"`
+	LowVRAM         *bool    `yaml:"low_vram,omitempty" json:"low_vram,omitempty"`
+	Reranking       *bool    `yaml:"reranking,omitempty" json:"reranking,omitempty"`
+	Grammar         string   `yaml:"grammar,omitempty" json:"grammar,omitempty"`
+	StopWords       []string `yaml:"stopwords,omitempty" json:"stopwords,omitempty"`
+	Cutstrings      []string `yaml:"cutstrings,omitempty" json:"cutstrings,omitempty"`
+	ExtractRegex    []string `yaml:"extract_regex,omitempty" json:"extract_regex,omitempty"`
+	TrimSpace       []string `yaml:"trimspace,omitempty" json:"trimspace,omitempty"`
+	TrimSuffix      []string `yaml:"trimsuffix,omitempty" json:"trimsuffix,omitempty"`
+
+	ContextSize          *int             `yaml:"context_size,omitempty" json:"context_size,omitempty"`
+	NUMA                 bool             `yaml:"numa,omitempty" json:"numa,omitempty"`
+	LoraAdapter          string           `yaml:"lora_adapter,omitempty" json:"lora_adapter,omitempty"`
+	LoraBase             string           `yaml:"lora_base,omitempty" json:"lora_base,omitempty"`
+	LoraAdapters         []string         `yaml:"lora_adapters,omitempty" json:"lora_adapters,omitempty"`
+	LoraScales           []float32        `yaml:"lora_scales,omitempty" json:"lora_scales,omitempty"`
+	LoraScale            float32          `yaml:"lora_scale,omitempty" json:"lora_scale,omitempty"`
+	NoMulMatQ            bool             `yaml:"no_mulmatq,omitempty" json:"no_mulmatq,omitempty"`
+	DraftModel           string           `yaml:"draft_model,omitempty" json:"draft_model,omitempty"`
+	NDraft               int32            `yaml:"n_draft,omitempty" json:"n_draft,omitempty"`
+	Quantization         string           `yaml:"quantization,omitempty" json:"quantization,omitempty"`
+	LoadFormat           string           `yaml:"load_format,omitempty" json:"load_format,omitempty"`
+	GPUMemoryUtilization float32          `yaml:"gpu_memory_utilization,omitempty" json:"gpu_memory_utilization,omitempty"` // vLLM
+	TrustRemoteCode      bool             `yaml:"trust_remote_code,omitempty" json:"trust_remote_code,omitempty"`           // vLLM
+	EnforceEager         bool             `yaml:"enforce_eager,omitempty" json:"enforce_eager,omitempty"`                   // vLLM
+	SwapSpace            int              `yaml:"swap_space,omitempty" json:"swap_space,omitempty"`                         // vLLM
+	MaxModelLen          int              `yaml:"max_model_len,omitempty" json:"max_model_len,omitempty"`                   // vLLM
+	TensorParallelSize   int              `yaml:"tensor_parallel_size,omitempty" json:"tensor_parallel_size,omitempty"`     // vLLM
+	DisableLogStatus     bool             `yaml:"disable_log_stats,omitempty" json:"disable_log_stats,omitempty"`           // vLLM
+	DType                string           `yaml:"dtype,omitempty" json:"dtype,omitempty"`                                   // vLLM
+	LimitMMPerPrompt     LimitMMPerPrompt `yaml:"limit_mm_per_prompt,omitempty" json:"limit_mm_per_prompt,omitempty"`       // vLLM
+	MMProj               string           `yaml:"mmproj,omitempty" json:"mmproj,omitempty"`
+
+	FlashAttention *string `yaml:"flash_attention,omitempty" json:"flash_attention,omitempty"`
+	NoKVOffloading bool    `yaml:"no_kv_offloading,omitempty" json:"no_kv_offloading,omitempty"`
+	CacheTypeK     string  `yaml:"cache_type_k,omitempty" json:"cache_type_k,omitempty"`
+	CacheTypeV     string  `yaml:"cache_type_v,omitempty" json:"cache_type_v,omitempty"`
+
+	RopeScaling string `yaml:"rope_scaling,omitempty" json:"rope_scaling,omitempty"`
+	ModelType   string `yaml:"type,omitempty" json:"type,omitempty"`
+
+	YarnExtFactor  float32 `yaml:"yarn_ext_factor,omitempty" json:"yarn_ext_factor,omitempty"`
+	YarnAttnFactor float32 `yaml:"yarn_attn_factor,omitempty" json:"yarn_attn_factor,omitempty"`
+	YarnBetaFast   float32 `yaml:"yarn_beta_fast,omitempty" json:"yarn_beta_fast,omitempty"`
+	YarnBetaSlow   float32 `yaml:"yarn_beta_slow,omitempty" json:"yarn_beta_slow,omitempty"`
+
+	CFGScale float32 `yaml:"cfg_scale,omitempty" json:"cfg_scale,omitempty"` // Classifier-Free Guidance Scale
+}
+
+// @Description LimitMMPerPrompt is a struct that holds the configuration for the limit-mm-per-prompt config in vLLM
 type LimitMMPerPrompt struct {
-	LimitImagePerPrompt int `yaml:"image" json:"image"`
-	LimitVideoPerPrompt int `yaml:"video" json:"video"`
-	LimitAudioPerPrompt int `yaml:"audio" json:"audio"`
+	LimitImagePerPrompt int `yaml:"image,omitempty" json:"image,omitempty"`
+	LimitVideoPerPrompt int `yaml:"video,omitempty" json:"video,omitempty"`
+	LimitAudioPerPrompt int `yaml:"audio,omitempty" json:"audio,omitempty"`
 }
 
-// TemplateConfig is a struct that holds the configuration of the templating system
+// @Description TemplateConfig is a struct that holds the configuration of the templating system
 type TemplateConfig struct {
 	// Chat is the template used in the chat completion endpoint
-	Chat string `yaml:"chat" json:"chat"`
+	Chat string `yaml:"chat,omitempty" json:"chat,omitempty"`
 
 	// ChatMessage is the template used for chat messages
-	ChatMessage string `yaml:"chat_message" json:"chat_message"`
+	ChatMessage string `yaml:"chat_message,omitempty" json:"chat_message,omitempty"`
 
 	// Completion is the template used for completion requests
-	Completion string `yaml:"completion" json:"completion"`
+	Completion string `yaml:"completion,omitempty" json:"completion,omitempty"`
 
 	// Edit is the template used for edit completion requests
-	Edit string `yaml:"edit" json:"edit"`
+	Edit string `yaml:"edit,omitempty" json:"edit,omitempty"`
 
 	// Functions is the template used when tools are present in the client requests
-	Functions string `yaml:"function" json:"function"`
+	Functions string `yaml:"function,omitempty" json:"function,omitempty"`
 
 	// UseTokenizerTemplate is a flag that indicates if the tokenizer template should be used.
 	// Note: this is mostly consumed for backends such as vllm and transformers
 	// that can use the tokenizers specified in the JSON config files of the models
-	UseTokenizerTemplate bool `yaml:"use_tokenizer_template" json:"use_tokenizer_template"`
+	UseTokenizerTemplate bool `yaml:"use_tokenizer_template,omitempty" json:"use_tokenizer_template,omitempty"`
 
 	// JoinChatMessagesByCharacter is a string that will be used to join chat messages together.
 	// It defaults to \n
-	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character" json:"join_chat_messages_by_character"`
+	JoinChatMessagesByCharacter *string `yaml:"join_chat_messages_by_character,omitempty" json:"join_chat_messages_by_character,omitempty"`
 
-	Multimodal string `yaml:"multimodal" json:"multimodal"`
+	Multimodal string `yaml:"multimodal,omitempty" json:"multimodal,omitempty"`
 
-	ReplyPrefix string `yaml:"reply_prefix" json:"reply_prefix"`
+	ReplyPrefix string `yaml:"reply_prefix,omitempty" json:"reply_prefix,omitempty"`
 }
 
 func (c *ModelConfig) syncKnownUsecasesFromString() {
diff --git a/core/schema/prediction.go b/core/schema/prediction.go
index a75c7ab160af..40345ba50b14 100644
--- a/core/schema/prediction.go
+++ b/core/schema/prediction.go
@@ -1,50 +1,51 @@
 package schema
 
+// @Description PredictionOptions contains prediction parameters for model inference
 type PredictionOptions struct {
 
 	// Also part of the OpenAI official spec
 	BasicModelRequest `yaml:",inline"`
 
 	// Also part of the OpenAI official spec
-	Language string `json:"language"`
+	Language string `json:"language,omitempty" yaml:"language,omitempty"`
 
 	// Only for audio transcription
-	Translate bool `json:"translate"`
+	Translate bool `json:"translate,omitempty" yaml:"translate,omitempty"`
 
 	// Also part of the OpenAI official spec. use it for returning multiple results
-	N int `json:"n"`
+	N int `json:"n,omitempty" yaml:"n,omitempty"`
 
 	// Common options between all the API calls, part of the OpenAI spec
-	TopP        *float64 `json:"top_p" yaml:"top_p"`
-	TopK        *int     `json:"top_k" yaml:"top_k"`
-	Temperature *float64 `json:"temperature" yaml:"temperature"`
-	Maxtokens   *int     `json:"max_tokens" yaml:"max_tokens"`
-	Echo        bool     `json:"echo"`
+	TopP        *float64 `json:"top_p,omitempty" yaml:"top_p,omitempty"`
+	TopK        *int     `json:"top_k,omitempty" yaml:"top_k,omitempty"`
+	Temperature *float64 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
+	Maxtokens   *int     `json:"max_tokens,omitempty" yaml:"max_tokens,omitempty"`
+	Echo        bool     `json:"echo,omitempty" yaml:"echo,omitempty"`
 
 	// Custom parameters - not present in the OpenAI API
-	Batch         int     `json:"batch" yaml:"batch"`
-	IgnoreEOS     bool    `json:"ignore_eos" yaml:"ignore_eos"`
-	RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
+	Batch         int     `json:"batch,omitempty" yaml:"batch,omitempty"`
+	IgnoreEOS     bool    `json:"ignore_eos,omitempty" yaml:"ignore_eos,omitempty"`
+	RepeatPenalty float64 `json:"repeat_penalty,omitempty" yaml:"repeat_penalty,omitempty"`
 
-	RepeatLastN int `json:"repeat_last_n" yaml:"repeat_last_n"`
+	RepeatLastN int `json:"repeat_last_n,omitempty" yaml:"repeat_last_n,omitempty"`
 
-	Keep int `json:"n_keep" yaml:"n_keep"`
+	Keep int `json:"n_keep,omitempty" yaml:"n_keep,omitempty"`
 
-	FrequencyPenalty float64  `json:"frequency_penalty" yaml:"frequency_penalty"`
-	PresencePenalty  float64  `json:"presence_penalty" yaml:"presence_penalty"`
-	TFZ              *float64 `json:"tfz" yaml:"tfz"`
+	FrequencyPenalty float64  `json:"frequency_penalty,omitempty" yaml:"frequency_penalty,omitempty"`
+	PresencePenalty  float64  `json:"presence_penalty,omitempty" yaml:"presence_penalty,omitempty"`
+	TFZ              *float64 `json:"tfz,omitempty" yaml:"tfz,omitempty"`
 
-	TypicalP *float64 `json:"typical_p" yaml:"typical_p"`
-	Seed     *int     `json:"seed" yaml:"seed"`
+	TypicalP *float64 `json:"typical_p,omitempty" yaml:"typical_p,omitempty"`
+	Seed     *int     `json:"seed,omitempty" yaml:"seed,omitempty"`
 
-	NegativePrompt      string  `json:"negative_prompt" yaml:"negative_prompt"`
-	RopeFreqBase        float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
-	RopeFreqScale       float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
-	NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
+	NegativePrompt      string  `json:"negative_prompt,omitempty" yaml:"negative_prompt,omitempty"`
+	RopeFreqBase        float32 `json:"rope_freq_base,omitempty" yaml:"rope_freq_base,omitempty"`
+	RopeFreqScale       float32 `json:"rope_freq_scale,omitempty" yaml:"rope_freq_scale,omitempty"`
+	NegativePromptScale float32 `json:"negative_prompt_scale,omitempty" yaml:"negative_prompt_scale,omitempty"`
 
 	// Diffusers
-	ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
+	ClipSkip int `json:"clip_skip,omitempty" yaml:"clip_skip,omitempty"`
 
 	// RWKV (?)
-	Tokenizer string `json:"tokenizer" yaml:"tokenizer"`
+	Tokenizer string `json:"tokenizer,omitempty" yaml:"tokenizer,omitempty"`
 }
diff --git a/core/schema/request.go b/core/schema/request.go
index f55f39200ca4..8998a2736950 100644
--- a/core/schema/request.go
+++ b/core/schema/request.go
@@ -5,8 +5,9 @@ type LocalAIRequest interface {
 	ModelName(*string) string
 }
 
+// @Description BasicModelRequest contains the basic model request fields
 type BasicModelRequest struct {
-	Model string `json:"model" yaml:"model"`
+	Model string `json:"model,omitempty" yaml:"model,omitempty"`
 	// TODO: Should this also include the following fields from the OpenAI side of the world?
 	// If so, changes should be made to core/http/middleware/request.go to match
 
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index 49c4970a7609..66a1e81ca8ab 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -13,99 +13,102 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
+// @Description GrammarConfig contains configuration for grammar parsing
 type GrammarConfig struct {
 	// ParallelCalls enables the LLM to return multiple function calls in the same response
-	ParallelCalls bool `yaml:"parallel_calls"`
+	ParallelCalls bool `yaml:"parallel_calls,omitempty" json:"parallel_calls,omitempty"`
 
-	DisableParallelNewLines bool `yaml:"disable_parallel_new_lines"`
+	DisableParallelNewLines bool `yaml:"disable_parallel_new_lines,omitempty" json:"disable_parallel_new_lines,omitempty"`
 
 	// MixedMode enables the LLM to return strings and not only JSON objects
 	// This is useful for models to not constraining returning only JSON and also messages back to the user
-	MixedMode bool `yaml:"mixed_mode"`
+	MixedMode bool `yaml:"mixed_mode,omitempty" json:"mixed_mode,omitempty"`
 
 	// NoMixedFreeString disables the mixed mode for free strings
 	// In this way if the LLM selects a free string, it won't be mixed necessarily with JSON objects.
 	// For example, if enabled the LLM or returns a JSON object or a free string, but not a mix of both
 	// If disabled(default): the LLM can return a JSON object surrounded by free strings (e.g. `this is the JSON result: { "bar": "baz" } for your question`). This forces the LLM to return at least a JSON object, but its not going to be strict
-	NoMixedFreeString bool `yaml:"no_mixed_free_string"`
+	NoMixedFreeString bool `yaml:"no_mixed_free_string,omitempty" json:"no_mixed_free_string,omitempty"`
 
 	// NoGrammar disables the grammar parsing and parses the responses directly from the LLM
-	NoGrammar bool `yaml:"disable"`
+	NoGrammar bool `yaml:"disable,omitempty" json:"disable,omitempty"`
 
 	// Prefix is the suffix to append to the grammar when being generated
 	// This is useful when models prepend a tag before returning JSON
-	Prefix string `yaml:"prefix"`
+	Prefix string `yaml:"prefix,omitempty" json:"prefix,omitempty"`
 
 	// ExpectStringsAfterJSON enables mixed string suffix
-	ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"`
+	ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json,omitempty" json:"expect_strings_after_json,omitempty"`
 
 	// PropOrder selects what order to print properties
 	// for instance name,arguments will make print { "name": "foo", "arguments": { "bar": "baz" } }
 	// instead of { "arguments": { "bar": "baz" }, "name": "foo" }
-	PropOrder string `yaml:"properties_order"`
+	PropOrder string `yaml:"properties_order,omitempty" json:"properties_order,omitempty"`
 
 	// SchemaType can be configured to use a specific schema type to force the grammar
 	// available : json, llama3.1
-	SchemaType string `yaml:"schema_type"`
+	SchemaType string `yaml:"schema_type,omitempty" json:"schema_type,omitempty"`
 
-	GrammarTriggers []GrammarTrigger `yaml:"triggers"`
+	GrammarTriggers []GrammarTrigger `yaml:"triggers,omitempty" json:"triggers,omitempty"`
 }
 
+// @Description GrammarTrigger defines a trigger word for grammar parsing
 type GrammarTrigger struct {
 	// Trigger is the string that triggers the grammar
-	Word string `yaml:"word"`
+	Word string `yaml:"word,omitempty" json:"word,omitempty"`
 }
 
-// FunctionsConfig is the configuration for the tool/function call.
+// @Description FunctionsConfig is the configuration for the tool/function call.
 // It includes setting to map the function name and arguments from the response
 // and, for instance, also if processing the requests with BNF grammars.
 type FunctionsConfig struct {
 	// DisableNoAction disables the "no action" tool
 	// By default we inject a tool that does nothing and is used to return an answer from the LLM
-	DisableNoAction bool `yaml:"disable_no_action"`
+	DisableNoAction bool `yaml:"disable_no_action,omitempty" json:"disable_no_action,omitempty"`
 
 	// Grammar is the configuration for the grammar
-	GrammarConfig GrammarConfig `yaml:"grammar"`
+	GrammarConfig GrammarConfig `yaml:"grammar,omitempty" json:"grammar,omitempty"`
 
 	// NoActionFunctionName is the name of the function that does nothing. It defaults to "answer"
-	NoActionFunctionName string `yaml:"no_action_function_name"`
+	NoActionFunctionName string `yaml:"no_action_function_name,omitempty" json:"no_action_function_name,omitempty"`
 
 	// NoActionDescriptionName is the name of the function that returns the description of the no action function
-	NoActionDescriptionName string `yaml:"no_action_description_name"`
+	NoActionDescriptionName string `yaml:"no_action_description_name,omitempty" json:"no_action_description_name,omitempty"`
 
 	// ResponseRegex is a named regex to extract the function name and arguments from the response
-	ResponseRegex []string `yaml:"response_regex"`
+	ResponseRegex []string `yaml:"response_regex,omitempty" json:"response_regex,omitempty"`
 
 	// JSONRegexMatch is a regex to extract the JSON object from the response
-	JSONRegexMatch []string `yaml:"json_regex_match"`
+	JSONRegexMatch []string `yaml:"json_regex_match,omitempty" json:"json_regex_match,omitempty"`
 
 	// ArgumentRegex is a named regex to extract the arguments from the response. Use ArgumentRegexKey and ArgumentRegexValue to set the names of the named regex for key and value of the arguments.
-	ArgumentRegex []string `yaml:"argument_regex"`
+	ArgumentRegex []string `yaml:"argument_regex,omitempty" json:"argument_regex,omitempty"`
 	// ArgumentRegex named regex names for key and value extractions. default: key and value
-	ArgumentRegexKey   string `yaml:"argument_regex_key_name"`   // default: key
-	ArgumentRegexValue string `yaml:"argument_regex_value_name"` // default: value
+	ArgumentRegexKey   string `yaml:"argument_regex_key_name,omitempty" json:"argument_regex_key_name,omitempty"`   // default: key
+	ArgumentRegexValue string `yaml:"argument_regex_value_name,omitempty" json:"argument_regex_value_name,omitempty"` // default: value
 
 	// ReplaceFunctionResults allow to replace strings in the results before parsing them
-	ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results"`
+	ReplaceFunctionResults []ReplaceResult `yaml:"replace_function_results,omitempty" json:"replace_function_results,omitempty"`
 
 	// ReplaceLLMResult allow to replace strings in the results before parsing them
-	ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results"`
+	ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results,omitempty" json:"replace_llm_results,omitempty"`
 
 	// CaptureLLMResult is a regex to extract a string from the LLM response
 	// that is used as return string when using tools.
 	// This is useful for e.g. if the LLM outputs a reasoning and we want to get the reasoning as a string back
-	CaptureLLMResult []string `yaml:"capture_llm_results"`
+	CaptureLLMResult []string `yaml:"capture_llm_results,omitempty" json:"capture_llm_results,omitempty"`
 
 	// FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
 	// instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }.
 	// This might be useful for certain models trained with the function name as the first token.
-	FunctionNameKey      string `yaml:"function_name_key"`
-	FunctionArgumentsKey string `yaml:"function_arguments_key"`
+	FunctionNameKey      string `yaml:"function_name_key,omitempty" json:"function_name_key,omitempty"`
+	FunctionArgumentsKey string `yaml:"function_arguments_key,omitempty" json:"function_arguments_key,omitempty"`
 }
 
+// @Description ReplaceResult defines a key-value replacement for function results
 type ReplaceResult struct {
-	Key   string `yaml:"key"`
-	Value string `yaml:"value"`
+	Key   string `yaml:"key,omitempty" json:"key,omitempty"`
+	Value string `yaml:"value,omitempty" json:"value,omitempty"`
 }
 
 type FuncCallResults struct {

From 2f4f8bd46da3bdeab804ea4ec39ae918dedf7d89 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 12 Nov 2025 09:38:42 +0100
Subject: [PATCH 08/16] Fix tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 pkg/huggingface-api/client_test.go | 56 +++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 17 deletions(-)

diff --git a/pkg/huggingface-api/client_test.go b/pkg/huggingface-api/client_test.go
index 37a381416125..ffa35bb90a94 100644
--- a/pkg/huggingface-api/client_test.go
+++ b/pkg/huggingface-api/client_test.go
@@ -270,6 +270,15 @@ var _ = Describe("HuggingFace API Client", func() {
 		})
 	})
 
+	Context("when getting file SHA on remote model", func() {
+		It("should get file SHA successfully", func() {
+			sha, err := client.GetFileSHA(
+				"mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF", "localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf")
+			Expect(err).ToNot(HaveOccurred())
+			Expect(sha).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"))
+		})
+	})
+
 	Context("when listing files", func() {
 		BeforeEach(func() {
 			mockFilesResponse := `[
@@ -329,23 +338,25 @@ var _ = Describe("HuggingFace API Client", func() {
 
 	Context("when getting file SHA", func() {
 		BeforeEach(func() {
-			mockFileInfoResponse := `{
-				"path": "model-Q4_K_M.gguf",
-				"size": 1000000,
-				"oid": "abc123",
-				"lfs": {
-					"oid": "sha256:def456",
+			mockFilesResponse := `[
+				{
+					"type": "file",
+					"path": "model-Q4_K_M.gguf",
 					"size": 1000000,
-					"pointer": "version https://git-lfs.github.com/spec/v1",
-					"sha256": "def456789"
+					"oid": "abc123",
+					"lfs": {
+						"oid": "def456789",
+						"size": 1000000,
+						"pointerSize": 135
+					}
 				}
-			}`
+			]`
 
 			server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				if strings.Contains(r.URL.Path, "/paths-info") {
+				if strings.Contains(r.URL.Path, "/tree/main") {
 					w.Header().Set("Content-Type", "application/json")
 					w.WriteHeader(http.StatusOK)
-					w.Write([]byte(mockFileInfoResponse))
+					w.Write([]byte(mockFilesResponse))
 				} else {
 					w.WriteHeader(http.StatusNotFound)
 				}
@@ -363,18 +374,29 @@ var _ = Describe("HuggingFace API Client", func() {
 
 		It("should handle missing SHA gracefully", func() {
 			server = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				w.Header().Set("Content-Type", "application/json")
-				w.WriteHeader(http.StatusOK)
-				w.Write([]byte(`{"path": "file.txt", "size": 100}`))
+				if strings.Contains(r.URL.Path, "/tree/main") {
+					w.Header().Set("Content-Type", "application/json")
+					w.WriteHeader(http.StatusOK)
+					w.Write([]byte(`[
+						{
+							"type": "file",
+							"path": "file.txt",
+							"size": 100,
+							"oid": "file123"
+						}
+					]`))
+				} else {
+					w.WriteHeader(http.StatusNotFound)
+				}
 			}))
 
 			client.SetBaseURL(server.URL)
 
 			sha, err := client.GetFileSHA("test/model", "file.txt")
 
-			Expect(err).To(HaveOccurred())
-			Expect(err.Error()).To(ContainSubstring("no SHA256 found"))
-			Expect(sha).To(Equal(""))
+			Expect(err).ToNot(HaveOccurred())
+			// When there's no LFS, it should return the OID
+			Expect(sha).To(Equal("file123"))
 		})
 	})
 

From 8937998c744dc752af88da850e0605e2055ef3bb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 12 Nov 2025 09:44:49 +0100
Subject: [PATCH 09/16] Add MLX importer

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/gallery/importers/importers.go |  1 +
 core/gallery/importers/mlx.go       | 94 +++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 core/gallery/importers/mlx.go

diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go
index 5236af0cc245..532d8e68c9f5 100644
--- a/core/gallery/importers/importers.go
+++ b/core/gallery/importers/importers.go
@@ -7,6 +7,7 @@ import (
 
 var DefaultImporters = []Importer{
 	&LlamaCPPImporter{},
+	&MLXImporter{},
 }
 
 type Importer interface {
diff --git a/core/gallery/importers/mlx.go b/core/gallery/importers/mlx.go
new file mode 100644
index 000000000000..a8e183791873
--- /dev/null
+++ b/core/gallery/importers/mlx.go
@@ -0,0 +1,94 @@
+package importers
+
+import (
+	"encoding/json"
+	"path/filepath"
+	"strings"
+
+	"github.com/mudler/LocalAI/core/config"
+	"github.com/mudler/LocalAI/core/gallery"
+	"github.com/mudler/LocalAI/core/schema"
+	"go.yaml.in/yaml/v2"
+)
+
+var _ Importer = &MLXImporter{}
+
+type MLXImporter struct{}
+
+func (i *MLXImporter) Match(uri string, request schema.ImportModelRequest) bool {
+	preferences, err := request.Preferences.MarshalJSON()
+	if err != nil {
+		return false
+	}
+	preferencesMap := make(map[string]any)
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return false
+	}
+
+	b, ok := preferencesMap["backend"].(string)
+	if ok && b == "mlx" || b == "mlx-vlm" {
+		return true
+	}
+
+	// All https://huggingface.co/mlx-community/*
+	if strings.Contains(uri, "mlx-community/") {
+		return true
+	}
+
+	return false
+}
+
+func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) {
+	preferences, err := request.Preferences.MarshalJSON()
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+	preferencesMap := make(map[string]any)
+	err = json.Unmarshal(preferences, &preferencesMap)
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+
+	name, ok := preferencesMap["name"].(string)
+	if !ok {
+		name = filepath.Base(uri)
+	}
+
+	description, ok := preferencesMap["description"].(string)
+	if !ok {
+		description = "Imported from " + uri
+	}
+
+	backend := "mlx"
+	b, ok := preferencesMap["backend"].(string)
+	if ok {
+		backend = b
+	}
+
+	modelConfig := config.ModelConfig{
+		Name:                name,
+		Description:         description,
+		KnownUsecaseStrings: []string{"chat"},
+		Backend:             backend,
+		PredictionOptions: schema.PredictionOptions{
+			BasicModelRequest: schema.BasicModelRequest{
+				Model: uri,
+			},
+		},
+		TemplateConfig: config.TemplateConfig{
+			UseTokenizerTemplate: true,
+		},
+	}
+
+	data, err := yaml.Marshal(modelConfig)
+	if err != nil {
+		return gallery.ModelConfig{}, err
+	}
+
+	return gallery.ModelConfig{
+		Name:        name,
+		Description: description,
+		ConfigFile:  string(data),
+	}, nil
+}

From dc44c57c80e91f6073807eac5b865578479064ec Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 12 Nov 2025 12:53:02 +0100
Subject: [PATCH 10/16] Small refactors to star to use HF for discovery

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/gallery/importers/importers.go         | 47 +++++++++++++++++++--
 core/gallery/importers/llama-cpp.go         | 20 ++++-----
 core/gallery/importers/mlx.go               | 16 +++----
 core/http/endpoints/localai/import_model.go | 14 ++----
 core/startup/model_preload.go               | 25 +++++------
 5 files changed, 75 insertions(+), 47 deletions(-)

diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go
index 532d8e68c9f5..f87bc5b81a8e 100644
--- a/core/gallery/importers/importers.go
+++ b/core/gallery/importers/importers.go
@@ -1,8 +1,12 @@
 package importers
 
 import (
+	"encoding/json"
+
+	"github.com/rs/zerolog/log"
+
 	"github.com/mudler/LocalAI/core/gallery"
-	"github.com/mudler/LocalAI/core/schema"
+	hfapi "github.com/mudler/LocalAI/pkg/huggingface-api"
 )
 
 var DefaultImporters = []Importer{
@@ -10,7 +14,44 @@ var DefaultImporters = []Importer{
 	&MLXImporter{},
 }
 
+type Details struct {
+	HuggingFace *hfapi.ModelDetails
+	URI         string
+	Preferences json.RawMessage
+}
+
 type Importer interface {
-	Match(uri string, request schema.ImportModelRequest) bool
-	Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error)
+	Match(details Details) bool
+	Import(details Details) (gallery.ModelConfig, error)
+}
+
+func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.ModelConfig, error) {
+	var err error
+	var modelConfig gallery.ModelConfig
+
+	hf := hfapi.NewClient()
+
+	hfDetails, err := hf.GetModelDetails(uri)
+	if err != nil {
+		// maybe not a HF repository
+		// TODO: maybe we can check if the URI is a valid HF repository
+		log.Debug().Str("uri", uri).Msg("Failed to get model details, maybe not a HF repository")
+	}
+
+	details := Details{
+		HuggingFace: hfDetails,
+		URI:         uri,
+		Preferences: preferences,
+	}
+
+	for _, importer := range DefaultImporters {
+		if importer.Match(details) {
+			modelConfig, err = importer.Import(details)
+			if err != nil {
+				continue
+			}
+			break
+		}
+	}
+	return modelConfig, err
 }
diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go
index 3596c868f50a..d84500fa19e6 100644
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -16,8 +16,8 @@ var _ Importer = &LlamaCPPImporter{}
 
 type LlamaCPPImporter struct{}
 
-func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest) bool {
-	preferences, err := request.Preferences.MarshalJSON()
+func (i *LlamaCPPImporter) Match(details Details) bool {
+	preferences, err := details.Preferences.MarshalJSON()
 	if err != nil {
 		return false
 	}
@@ -31,11 +31,11 @@ func (i *LlamaCPPImporter) Match(uri string, request schema.ImportModelRequest)
 		return true
 	}
 
-	return strings.HasSuffix(uri, ".gguf")
+	return strings.HasSuffix(details.URI, ".gguf")
 }
 
-func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) {
-	preferences, err := request.Preferences.MarshalJSON()
+func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) {
+	preferences, err := details.Preferences.MarshalJSON()
 	if err != nil {
 		return gallery.ModelConfig{}, err
 	}
@@ -47,12 +47,12 @@ func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest)
 
 	name, ok := preferencesMap["name"].(string)
 	if !ok {
-		name = filepath.Base(uri)
+		name = filepath.Base(details.URI)
 	}
 
 	description, ok := preferencesMap["description"].(string)
 	if !ok {
-		description = "Imported from " + uri
+		description = "Imported from " + details.URI
 	}
 
 	modelConfig := config.ModelConfig{
@@ -62,7 +62,7 @@ func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest)
 		Backend:             "llama-cpp",
 		PredictionOptions: schema.PredictionOptions{
 			BasicModelRequest: schema.BasicModelRequest{
-				Model: filepath.Base(uri),
+				Model: filepath.Base(details.URI),
 			},
 		},
 		TemplateConfig: config.TemplateConfig{
@@ -86,8 +86,8 @@ func (i *LlamaCPPImporter) Import(uri string, request schema.ImportModelRequest)
 		ConfigFile:  string(data),
 		Files: []gallery.File{
 			{
-				URI:      uri,
-				Filename: filepath.Base(uri),
+				URI:      details.URI,
+				Filename: filepath.Base(details.URI),
 			},
 		},
 	}, nil
diff --git a/core/gallery/importers/mlx.go b/core/gallery/importers/mlx.go
index a8e183791873..faa28846f4ea 100644
--- a/core/gallery/importers/mlx.go
+++ b/core/gallery/importers/mlx.go
@@ -15,8 +15,8 @@ var _ Importer = &MLXImporter{}
 
 type MLXImporter struct{}
 
-func (i *MLXImporter) Match(uri string, request schema.ImportModelRequest) bool {
-	preferences, err := request.Preferences.MarshalJSON()
+func (i *MLXImporter) Match(details Details) bool {
+	preferences, err := details.Preferences.MarshalJSON()
 	if err != nil {
 		return false
 	}
@@ -32,15 +32,15 @@ func (i *MLXImporter) Match(uri string, request schema.ImportModelRequest) bool
 	}
 
 	// All https://huggingface.co/mlx-community/*
-	if strings.Contains(uri, "mlx-community/") {
+	if strings.Contains(details.URI, "mlx-community/") {
 		return true
 	}
 
 	return false
 }
 
-func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gallery.ModelConfig, error) {
-	preferences, err := request.Preferences.MarshalJSON()
+func (i *MLXImporter) Import(details Details) (gallery.ModelConfig, error) {
+	preferences, err := details.Preferences.MarshalJSON()
 	if err != nil {
 		return gallery.ModelConfig{}, err
 	}
@@ -52,12 +52,12 @@ func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gal
 
 	name, ok := preferencesMap["name"].(string)
 	if !ok {
-		name = filepath.Base(uri)
+		name = filepath.Base(details.URI)
 	}
 
 	description, ok := preferencesMap["description"].(string)
 	if !ok {
-		description = "Imported from " + uri
+		description = "Imported from " + details.URI
 	}
 
 	backend := "mlx"
@@ -73,7 +73,7 @@ func (i *MLXImporter) Import(uri string, request schema.ImportModelRequest) (gal
 		Backend:             backend,
 		PredictionOptions: schema.PredictionOptions{
 			BasicModelRequest: schema.BasicModelRequest{
-				Model: uri,
+				Model: details.URI,
 			},
 		},
 		TemplateConfig: config.TemplateConfig{
diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go
index ea71ae53ae5d..b6d34055390f 100644
--- a/core/http/endpoints/localai/import_model.go
+++ b/core/http/endpoints/localai/import_model.go
@@ -30,17 +30,9 @@ func ImportModelURIEndpoint(cl *config.ModelConfigLoader, appConfig *config.Appl
 			return err
 		}
 
-		var err error
-		var modelConfig gallery.ModelConfig
-
-		for _, importer := range importers.DefaultImporters {
-			if importer.Match(input.URI, *input) {
-				modelConfig, err = importer.Import(input.URI, *input)
-				if err != nil {
-					continue
-				}
-				break
-			}
+		modelConfig, err := importers.DiscoverModelConfig(input.URI, input.Preferences)
+		if err != nil {
+			return fmt.Errorf("failed to discover model config: %w", err)
 		}
 
 		uuid, err := uuid.NewUUID()
diff --git a/core/startup/model_preload.go b/core/startup/model_preload.go
index 4550a25e46f3..193aad6a2e52 100644
--- a/core/startup/model_preload.go
+++ b/core/startup/model_preload.go
@@ -1,6 +1,7 @@
 package startup
 
 import (
+	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
@@ -13,7 +14,6 @@ import (
 	"github.com/mudler/LocalAI/core/config"
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/gallery/importers"
-	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/core/services"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/model"
@@ -165,22 +165,17 @@ func InstallModels(galleryService *services.GalleryService, galleries, backendGa
 						continue
 					}
 
-					// TODO: start autoimporter
-					var err error
-					var modelConfig gallery.ModelConfig
-					for _, importer := range importers.DefaultImporters {
-						if importer.Match(url, schema.ImportModelRequest{}) {
-							modelConfig, err = importer.Import(url, schema.ImportModelRequest{})
-							if err != nil {
-								continue
-							}
-							break
-						}
+					// TODO: we should just use the discoverModelConfig here and default to this.
+					modelConfig, discoverErr := importers.DiscoverModelConfig(url, json.RawMessage{})
+					if discoverErr != nil {
+						err = errors.Join(discoverErr, fmt.Errorf("failed to discover model config: %w", err))
+						continue
 					}
 
-					uuid, err := uuid.NewUUID()
-					if err != nil {
-						return err
+					uuid, uuidErr := uuid.NewUUID()
+					if uuidErr != nil {
+						err = errors.Join(uuidErr, fmt.Errorf("failed to generate UUID: %w", uuidErr))
+						continue
 					}
 
 					galleryService.ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{

From 71f2163459c932b521cdd1fc9eba24fceb6908ef Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 12 Nov 2025 16:05:27 +0100
Subject: [PATCH 11/16] Add tests

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .../gallery/importers/importers_suite_test.go |  13 ++
 core/gallery/importers/llama-cpp_test.go      | 128 +++++++++++++++
 core/gallery/importers/mlx_test.go            | 147 ++++++++++++++++++
 3 files changed, 288 insertions(+)
 create mode 100644 core/gallery/importers/importers_suite_test.go
 create mode 100644 core/gallery/importers/llama-cpp_test.go
 create mode 100644 core/gallery/importers/mlx_test.go

diff --git a/core/gallery/importers/importers_suite_test.go b/core/gallery/importers/importers_suite_test.go
new file mode 100644
index 000000000000..a65b8163ad56
--- /dev/null
+++ b/core/gallery/importers/importers_suite_test.go
@@ -0,0 +1,13 @@
+package importers_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestImporters(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Importers test suite")
+}
diff --git a/core/gallery/importers/llama-cpp_test.go b/core/gallery/importers/llama-cpp_test.go
new file mode 100644
index 000000000000..19f25e0a1dc6
--- /dev/null
+++ b/core/gallery/importers/llama-cpp_test.go
@@ -0,0 +1,128 @@
+package importers_test
+
+import (
+	"encoding/json"
+
+	"github.com/mudler/LocalAI/core/gallery/importers"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("LlamaCPPImporter", func() {
+	var importer *importers.LlamaCPPImporter
+
+	BeforeEach(func() {
+		importer = &importers.LlamaCPPImporter{}
+	})
+
+	Context("Match", func() {
+		It("should match when URI ends with .gguf", func() {
+			details := importers.Details{
+				URI: "https://example.com/model.gguf",
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should match when backend preference is llama-cpp", func() {
+			preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
+			details := importers.Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should not match when URI does not end with .gguf and no backend preference", func() {
+			details := importers.Details{
+				URI: "https://example.com/model.bin",
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+
+		It("should not match when backend preference is different", func() {
+			preferences := json.RawMessage(`{"backend": "mlx"}`)
+			details := importers.Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+
+		It("should return false when JSON preferences are invalid", func() {
+			preferences := json.RawMessage(`invalid json`)
+			details := importers.Details{
+				URI:         "https://example.com/model.gguf",
+				Preferences: preferences,
+			}
+
+			// Invalid JSON causes Match to return false early
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+	})
+
+	Context("Import", func() {
+		It("should import model config with default name and description", func() {
+			details := importers.Details{
+				URI: "https://example.com/my-model.gguf",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("my-model.gguf"))
+			Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
+			Expect(modelConfig.Files).To(HaveLen(1))
+			Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"))
+			Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: my-model.gguf"))
+		})
+
+		It("should import model config with custom name and description from preferences", func() {
+			preferences := json.RawMessage(`{"name": "custom-model", "description": "Custom description"}`)
+			details := importers.Details{
+				URI:         "https://example.com/my-model.gguf",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("custom-model"))
+			Expect(modelConfig.Description).To(Equal("Custom description"))
+			Expect(modelConfig.Files).To(HaveLen(1))
+		})
+
+		It("should handle invalid JSON preferences", func() {
+			preferences := json.RawMessage(`invalid json`)
+			details := importers.Details{
+				URI:         "https://example.com/my-model.gguf",
+				Preferences: preferences,
+			}
+
+			_, err := importer.Import(details)
+			Expect(err).To(HaveOccurred())
+		})
+
+		It("should extract filename correctly from URI with path", func() {
+			details := importers.Details{
+				URI: "https://example.com/path/to/model.gguf",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: model.gguf"))
+		})
+	})
+})
diff --git a/core/gallery/importers/mlx_test.go b/core/gallery/importers/mlx_test.go
new file mode 100644
index 000000000000..82e02aff0b44
--- /dev/null
+++ b/core/gallery/importers/mlx_test.go
@@ -0,0 +1,147 @@
+package importers_test
+
+import (
+	"encoding/json"
+
+	"github.com/mudler/LocalAI/core/gallery/importers"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("MLXImporter", func() {
+	var importer *importers.MLXImporter
+
+	BeforeEach(func() {
+		importer = &importers.MLXImporter{}
+	})
+
+	Context("Match", func() {
+		It("should match when URI contains mlx-community/", func() {
+			details := importers.Details{
+				URI: "https://huggingface.co/mlx-community/test-model",
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should match when backend preference is mlx", func() {
+			preferences := json.RawMessage(`{"backend": "mlx"}`)
+			details := importers.Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should match when backend preference is mlx-vlm", func() {
+			preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
+			details := importers.Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeTrue())
+		})
+
+		It("should not match when URI does not contain mlx-community/ and no backend preference", func() {
+			details := importers.Details{
+				URI: "https://huggingface.co/other-org/test-model",
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+
+		It("should not match when backend preference is different", func() {
+			preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
+			details := importers.Details{
+				URI:         "https://example.com/model",
+				Preferences: preferences,
+			}
+
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+
+		It("should return false when JSON preferences are invalid", func() {
+			preferences := json.RawMessage(`invalid json`)
+			details := importers.Details{
+				URI:         "https://huggingface.co/mlx-community/test-model",
+				Preferences: preferences,
+			}
+
+			// Invalid JSON causes Match to return false early
+			result := importer.Match(details)
+			Expect(result).To(BeFalse())
+		})
+	})
+
+	Context("Import", func() {
+		It("should import model config with default name and description", func() {
+			details := importers.Details{
+				URI: "https://huggingface.co/mlx-community/test-model",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("test-model"))
+			Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: https://huggingface.co/mlx-community/test-model"))
+		})
+
+		It("should import model config with custom name and description from preferences", func() {
+			preferences := json.RawMessage(`{"name": "custom-mlx-model", "description": "Custom MLX description"}`)
+			details := importers.Details{
+				URI:         "https://huggingface.co/mlx-community/test-model",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("custom-mlx-model"))
+			Expect(modelConfig.Description).To(Equal("Custom MLX description"))
+		})
+
+		It("should use custom backend from preferences", func() {
+			preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
+			details := importers.Details{
+				URI:         "https://huggingface.co/mlx-community/test-model",
+				Preferences: preferences,
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm"))
+		})
+
+		It("should handle invalid JSON preferences", func() {
+			preferences := json.RawMessage(`invalid json`)
+			details := importers.Details{
+				URI:         "https://huggingface.co/mlx-community/test-model",
+				Preferences: preferences,
+			}
+
+			_, err := importer.Import(details)
+			Expect(err).To(HaveOccurred())
+		})
+
+		It("should extract filename correctly from URI with path", func() {
+			details := importers.Details{
+				URI: "https://huggingface.co/mlx-community/path/to/model",
+			}
+
+			modelConfig, err := importer.Import(details)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("model"))
+		})
+	})
+})

From b8ed4467f0cb6f43338dbb594fea27a781f32bb8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 12 Nov 2025 16:13:06 +0100
Subject: [PATCH 12/16] Common preferences

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/views/model-editor.html | 151 +++++++++++++++++++++++-------
 1 file changed, 118 insertions(+), 33 deletions(-)

diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html
index f8db7cf66fae..68b26360c1b5 100644
--- a/core/http/views/model-editor.html
+++ b/core/http/views/model-editor.html
@@ -109,40 +109,107 @@ <h2 class="text-2xl font-semibold text-white flex items-center gap-3 mb-6">
                         <label class="block text-sm font-medium text-gray-300">
                             <i class="fas fa-cog mr-2"></i>Preferences (Optional)
                         </label>
-                        <button @click="addPreference()" 
-                                :disabled="isSubmitting"
-                                class="text-sm px-3 py-1.5 rounded-lg bg-green-600/20 hover:bg-green-600/30 text-green-300 border border-green-500/30 transition-all">
-                            <i class="fas fa-plus mr-1"></i>Add Preference
-                        </button>
                     </div>
                     
-                    <div class="space-y-3" x-show="preferences.length > 0">
-                        <template x-for="(pref, index) in preferences" :key="index">
-                            <div class="flex gap-3 items-center">
-                                <input 
-                                    x-model="pref.key"
-                                    type="text" 
-                                    placeholder="Key"
-                                    class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
-                                    :disabled="isSubmitting">
-                                <span class="text-gray-400">:</span>
-                                <input 
-                                    x-model="pref.value"
-                                    type="text" 
-                                    placeholder="Value"
-                                    class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
-                                    :disabled="isSubmitting">
-                                <button @click="removePreference(index)" 
-                                        :disabled="isSubmitting"
-                                        class="px-3 py-2 rounded-lg bg-red-600/20 hover:bg-red-600/30 text-red-300 border border-red-500/30 transition-all">
-                                    <i class="fas fa-trash"></i>
-                                </button>
-                            </div>
-                        </template>
+                    <!-- Common Preferences -->
+                    <div class="space-y-4 mb-6 p-4 bg-gray-900/50 rounded-xl border border-gray-700/50">
+                        <h3 class="text-sm font-semibold text-gray-300 mb-3 flex items-center">
+                            <i class="fas fa-star mr-2 text-yellow-400"></i>Common Preferences
+                        </h3>
+                        
+                        <!-- Backend Selection -->
+                        <div>
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-server mr-2"></i>Backend
+                            </label>
+                            <select 
+                                x-model="commonPreferences.backend"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                :disabled="isSubmitting">
+                                <option value="">Auto-detect (based on URI)</option>
+                                <option value="llama-cpp">llama-cpp</option>
+                                <option value="mlx">mlx</option>
+                                <option value="mlx-vlm">mlx-vlm</option>
+                            </select>
+                            <p class="mt-1 text-xs text-gray-400">
+                                Force a specific backend. Leave empty to auto-detect from URI.
+                            </p>
+                        </div>
+                        
+                        <!-- Model Name -->
+                        <div>
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-tag mr-2"></i>Model Name
+                            </label>
+                            <input 
+                                x-model="commonPreferences.name"
+                                type="text" 
+                                placeholder="Leave empty to use filename"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                :disabled="isSubmitting">
+                            <p class="mt-1 text-xs text-gray-400">
+                                Custom name for the model. If empty, the filename will be used.
+                            </p>
+                        </div>
+                        
+                        <!-- Description -->
+                        <div>
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-align-left mr-2"></i>Description
+                            </label>
+                            <textarea 
+                                x-model="commonPreferences.description"
+                                rows="3"
+                                placeholder="Leave empty to use default description"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all resize-none"
+                                :disabled="isSubmitting"></textarea>
+                            <p class="mt-1 text-xs text-gray-400">
+                                Custom description for the model. If empty, a default description will be generated.
+                            </p>
+                        </div>
+                    </div>
+                    
+                    <!-- Custom Preferences -->
+                    <div class="space-y-3">
+                        <div class="flex items-center justify-between mb-3">
+                            <label class="block text-sm font-medium text-gray-300">
+                                <i class="fas fa-sliders-h mr-2"></i>Custom Preferences
+                            </label>
+                            <button @click="addPreference()" 
+                                    :disabled="isSubmitting"
+                                    class="text-sm px-3 py-1.5 rounded-lg bg-green-600/20 hover:bg-green-600/30 text-green-300 border border-green-500/30 transition-all">
+                                <i class="fas fa-plus mr-1"></i>Add Custom
+                            </button>
+                        </div>
+                        
+                        <div class="space-y-3" x-show="preferences.length > 0">
+                            <template x-for="(pref, index) in preferences" :key="index">
+                                <div class="flex gap-3 items-center">
+                                    <input 
+                                        x-model="pref.key"
+                                        type="text" 
+                                        placeholder="Key"
+                                        class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                        :disabled="isSubmitting">
+                                    <span class="text-gray-400">:</span>
+                                    <input 
+                                        x-model="pref.value"
+                                        type="text" 
+                                        placeholder="Value"
+                                        class="flex-1 px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                        :disabled="isSubmitting">
+                                    <button @click="removePreference(index)" 
+                                            :disabled="isSubmitting"
+                                            class="px-3 py-2 rounded-lg bg-red-600/20 hover:bg-red-600/30 text-red-300 border border-red-500/30 transition-all">
+                                        <i class="fas fa-trash"></i>
+                                    </button>
+                                </div>
+                            </template>
+                        </div>
+                        <p class="mt-2 text-xs text-gray-400">
+                            Add custom key-value pairs for advanced configuration
+                        </p>
                     </div>
-                    <p class="mt-2 text-xs text-gray-400">
-                        Add key-value pairs to customize the import process
-                    </p>
                 </div>
             </div>
         </div>
@@ -354,6 +421,11 @@ <h2 class="text-xl font-semibold text-white flex items-center gap-3">
         isEditMode: {{if .ModelName}}true{{else}}false{{end}},
         importUri: '',
         preferences: [],
+        commonPreferences: {
+            backend: '',
+            name: '',
+            description: ''
+        },
         isSubmitting: false,
         currentJobId: null,
         jobPollInterval: null,
@@ -402,11 +474,24 @@ <h2 class="text-xl font-semibold text-white flex items-center gap-3">
             this.isSubmitting = true;
             
             try {
-                // Build preferences object
+                // Build preferences object starting with common preferences
                 const prefsObj = {};
+                
+                // Add common preferences (only non-empty values)
+                if (this.commonPreferences.backend && this.commonPreferences.backend.trim()) {
+                    prefsObj.backend = this.commonPreferences.backend.trim();
+                }
+                if (this.commonPreferences.name && this.commonPreferences.name.trim()) {
+                    prefsObj.name = this.commonPreferences.name.trim();
+                }
+                if (this.commonPreferences.description && this.commonPreferences.description.trim()) {
+                    prefsObj.description = this.commonPreferences.description.trim();
+                }
+                
+                // Add custom preferences (can override common ones)
                 this.preferences.forEach(pref => {
                     if (pref.key && pref.value) {
-                        prefsObj[pref.key] = pref.value;
+                        prefsObj[pref.key.trim()] = pref.value.trim();
                     }
                 });
                 

From 9cd627362efb0695d22fbe6635b26d50c970768c Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 12 Nov 2025 17:41:04 +0100
Subject: [PATCH 13/16] Add support to bare HF repos

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/gallery/importers/importers.go      |  10 +-
 core/gallery/importers/importers_test.go | 173 +++++++++++++++++++++++
 core/gallery/importers/llama-cpp.go      |  76 +++++++---
 core/gallery/importers/llama-cpp_test.go |  17 ++-
 core/http/views/model-editor.html        |  22 ++-
 pkg/huggingface-api/client.go            |   7 +
 pkg/huggingface-api/client_test.go       |   8 ++
 7 files changed, 287 insertions(+), 26 deletions(-)
 create mode 100644 core/gallery/importers/importers_test.go

diff --git a/core/gallery/importers/importers.go b/core/gallery/importers/importers.go
index f87bc5b81a8e..0be81ed06d0d 100644
--- a/core/gallery/importers/importers.go
+++ b/core/gallery/importers/importers.go
@@ -2,6 +2,7 @@ package importers
 
 import (
 	"encoding/json"
+	"strings"
 
 	"github.com/rs/zerolog/log"
 
@@ -31,11 +32,18 @@ func DiscoverModelConfig(uri string, preferences json.RawMessage) (gallery.Model
 
 	hf := hfapi.NewClient()
 
-	hfDetails, err := hf.GetModelDetails(uri)
+	hfrepoID := strings.ReplaceAll(uri, "huggingface://", "")
+	hfrepoID = strings.ReplaceAll(hfrepoID, "hf://", "")
+	hfrepoID = strings.ReplaceAll(hfrepoID, "https://huggingface.co/", "")
+
+	hfDetails, err := hf.GetModelDetails(hfrepoID)
 	if err != nil {
 		// maybe not a HF repository
 		// TODO: maybe we can check if the URI is a valid HF repository
 		log.Debug().Str("uri", uri).Msg("Failed to get model details, maybe not a HF repository")
+	} else {
+		log.Debug().Str("uri", uri).Msg("Got model details")
+		log.Debug().Any("details", hfDetails).Msg("Model details")
 	}
 
 	details := Details{
diff --git a/core/gallery/importers/importers_test.go b/core/gallery/importers/importers_test.go
new file mode 100644
index 000000000000..48fb6ac3d46a
--- /dev/null
+++ b/core/gallery/importers/importers_test.go
@@ -0,0 +1,173 @@
+package importers_test
+
+import (
+	"encoding/json"
+	"fmt"
+
+	"github.com/mudler/LocalAI/core/gallery/importers"
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("DiscoverModelConfig", func() {
+
+	Context("With only a repository URI", func() {
+		It("should discover and import using LlamaCPPImporter", func() {
+			uri := "https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error: %v", err))
+			Expect(modelConfig.Name).To(Equal("LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].Filename).To(Equal("localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].URI).To(Equal("https://huggingface.co/mudler/LocalAI-functioncall-qwen2.5-7b-v0.5-Q4_K_M-GGUF/resolve/main/localai-functioncall-qwen2.5-7b-v0.5-q4_k_m.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].SHA256).To(Equal("4e7b7fe1d54b881f1ef90799219dc6cc285d29db24f559c8998d1addb35713d4"), fmt.Sprintf("Model config: %+v", modelConfig))
+		})
+	})
+
+	Context("with .gguf URI", func() {
+		It("should discover and import using LlamaCPPImporter", func() {
+			uri := "https://example.com/my-model.gguf"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("my-model.gguf"))
+			Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
+		})
+
+		It("should use custom preferences when provided", func() {
+			uri := "https://example.com/my-model.gguf"
+			preferences := json.RawMessage(`{"name": "custom-name", "description": "Custom description"}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("custom-name"))
+			Expect(modelConfig.Description).To(Equal("Custom description"))
+		})
+	})
+
+	Context("with mlx-community URI", func() {
+		It("should discover and import using MLXImporter", func() {
+			uri := "https://huggingface.co/mlx-community/test-model"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("test-model"))
+			Expect(modelConfig.Description).To(Equal("Imported from https://huggingface.co/mlx-community/test-model"))
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
+		})
+
+		It("should use custom preferences when provided", func() {
+			uri := "https://huggingface.co/mlx-community/test-model"
+			preferences := json.RawMessage(`{"name": "custom-mlx", "description": "Custom MLX description"}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("custom-mlx"))
+			Expect(modelConfig.Description).To(Equal("Custom MLX description"))
+		})
+	})
+
+	Context("with backend preference", func() {
+		It("should use llama-cpp backend when specified", func() {
+			uri := "https://example.com/model"
+			preferences := json.RawMessage(`{"backend": "llama-cpp"}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
+		})
+
+		It("should use mlx backend when specified", func() {
+			uri := "https://example.com/model"
+			preferences := json.RawMessage(`{"backend": "mlx"}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx"))
+		})
+
+		It("should use mlx-vlm backend when specified", func() {
+			uri := "https://example.com/model"
+			preferences := json.RawMessage(`{"backend": "mlx-vlm"}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: mlx-vlm"))
+		})
+	})
+
+	Context("with HuggingFace URI formats", func() {
+		It("should handle huggingface:// prefix", func() {
+			uri := "huggingface://mlx-community/test-model"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("test-model"))
+		})
+
+		It("should handle hf:// prefix", func() {
+			uri := "hf://mlx-community/test-model"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("test-model"))
+		})
+
+		It("should handle https://huggingface.co/ prefix", func() {
+			uri := "https://huggingface.co/mlx-community/test-model"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).ToNot(HaveOccurred())
+			Expect(modelConfig.Name).To(Equal("test-model"))
+		})
+	})
+
+	Context("with invalid or non-matching URI", func() {
+		It("should return error when no importer matches", func() {
+			uri := "https://example.com/unknown-model.bin"
+			preferences := json.RawMessage(`{}`)
+
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			// When no importer matches, the function returns empty config and error
+			// The exact behavior depends on implementation, but typically an error is returned
+			Expect(modelConfig.Name).To(BeEmpty())
+			Expect(err).To(HaveOccurred())
+		})
+	})
+
+	Context("with invalid JSON preferences", func() {
+		It("should return error when JSON is invalid even if URI matches", func() {
+			uri := "https://example.com/model.gguf"
+			preferences := json.RawMessage(`invalid json`)
+
+			// Even though Match() returns true for .gguf extension,
+			// Import() will fail when trying to unmarshal invalid JSON preferences
+			modelConfig, err := importers.DiscoverModelConfig(uri, preferences)
+
+			Expect(err).To(HaveOccurred())
+			Expect(modelConfig.Name).To(BeEmpty())
+		})
+	})
+})
diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go
index d84500fa19e6..d4c8ff591fdb 100644
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -3,6 +3,7 @@ package importers
 import (
 	"encoding/json"
 	"path/filepath"
+	"slices"
 	"strings"
 
 	"github.com/mudler/LocalAI/core/config"
@@ -31,7 +32,19 @@ func (i *LlamaCPPImporter) Match(details Details) bool {
 		return true
 	}
 
-	return strings.HasSuffix(details.URI, ".gguf")
+	if strings.HasSuffix(details.URI, ".gguf") {
+		return true
+	}
+
+	if details.HuggingFace != nil {
+		for _, file := range details.HuggingFace.Files {
+			if strings.HasSuffix(file.Path, ".gguf") {
+				return true
+			}
+		}
+	}
+
+	return false
 }
 
 func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error) {
@@ -55,16 +68,17 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
 		description = "Imported from " + details.URI
 	}
 
+	preferedQuantizations, _ := preferencesMap["quantizations"].(string)
+	quants := []string{"q4_k_m"}
+	if preferedQuantizations != "" {
+		quants = strings.Split(preferedQuantizations, ",")
+	}
+
 	modelConfig := config.ModelConfig{
 		Name:                name,
 		Description:         description,
 		KnownUsecaseStrings: []string{"chat"},
 		Backend:             "llama-cpp",
-		PredictionOptions: schema.PredictionOptions{
-			BasicModelRequest: schema.BasicModelRequest{
-				Model: filepath.Base(details.URI),
-			},
-		},
 		TemplateConfig: config.TemplateConfig{
 			UseTokenizerTemplate: true,
 		},
@@ -75,20 +89,48 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
 		},
 	}
 
+	cfg := gallery.ModelConfig{
+		Name:        name,
+		Description: description,
+	}
+
+	if strings.Contains(details.URI, ".gguf") {
+		cfg.Files = append(cfg.Files, gallery.File{
+			URI:      details.URI,
+			Filename: filepath.Base(details.URI),
+		})
+		modelConfig.PredictionOptions = schema.PredictionOptions{
+			BasicModelRequest: schema.BasicModelRequest{
+				Model: filepath.Base(details.URI),
+			},
+		}
+	} else if details.HuggingFace != nil {
+		for _, file := range details.HuggingFace.Files {
+			if slices.ContainsFunc(quants, func(quant string) bool {
+				return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
+			}) {
+				cfg.Files = append(cfg.Files, gallery.File{
+					URI:      file.URL,
+					Filename: filepath.Base(file.Path),
+					SHA256:   file.SHA256,
+				})
+			}
+		}
+		if len(modelConfig.DownloadFiles) > 0 {
+			modelConfig.PredictionOptions = schema.PredictionOptions{
+				BasicModelRequest: schema.BasicModelRequest{
+					Model: modelConfig.DownloadFiles[0].Filename,
+				},
+			}
+		}
+	}
+
 	data, err := yaml.Marshal(modelConfig)
 	if err != nil {
 		return gallery.ModelConfig{}, err
 	}
 
-	return gallery.ModelConfig{
-		Name:        name,
-		Description: description,
-		ConfigFile:  string(data),
-		Files: []gallery.File{
-			{
-				URI:      details.URI,
-				Filename: filepath.Base(details.URI),
-			},
-		},
-	}, nil
+	cfg.ConfigFile = string(data)
+
+	return cfg, nil
 }
diff --git a/core/gallery/importers/llama-cpp_test.go b/core/gallery/importers/llama-cpp_test.go
index 19f25e0a1dc6..5a3c4d74f44d 100644
--- a/core/gallery/importers/llama-cpp_test.go
+++ b/core/gallery/importers/llama-cpp_test.go
@@ -2,6 +2,7 @@ package importers_test
 
 import (
 	"encoding/json"
+	"fmt"
 
 	"github.com/mudler/LocalAI/core/gallery/importers"
 	. "github.com/onsi/ginkgo/v2"
@@ -80,11 +81,10 @@ var _ = Describe("LlamaCPPImporter", func() {
 			Expect(err).ToNot(HaveOccurred())
 			Expect(modelConfig.Name).To(Equal("my-model.gguf"))
 			Expect(modelConfig.Description).To(Equal("Imported from https://example.com/my-model.gguf"))
-			Expect(modelConfig.Files).To(HaveLen(1))
-			Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"))
-			Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"))
 			Expect(modelConfig.ConfigFile).To(ContainSubstring("backend: llama-cpp"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: my-model.gguf"))
+			Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
 		})
 
 		It("should import model config with custom name and description from preferences", func() {
@@ -99,7 +99,9 @@ var _ = Describe("LlamaCPPImporter", func() {
 			Expect(err).ToNot(HaveOccurred())
 			Expect(modelConfig.Name).To(Equal("custom-model"))
 			Expect(modelConfig.Description).To(Equal("Custom description"))
-			Expect(modelConfig.Files).To(HaveLen(1))
+			Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].Filename).To(Equal("my-model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
 		})
 
 		It("should handle invalid JSON preferences", func() {
@@ -121,8 +123,9 @@ var _ = Describe("LlamaCPPImporter", func() {
 			modelConfig, err := importer.Import(details)
 
 			Expect(err).ToNot(HaveOccurred())
-			Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf"))
-			Expect(modelConfig.ConfigFile).To(ContainSubstring("model: model.gguf"))
+			Expect(len(modelConfig.Files)).To(Equal(1), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].URI).To(Equal("https://example.com/path/to/model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
+			Expect(modelConfig.Files[0].Filename).To(Equal("model.gguf"), fmt.Sprintf("Model config: %+v", modelConfig))
 		})
 	})
 })
diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html
index 68b26360c1b5..d18aa6cdb924 100644
--- a/core/http/views/model-editor.html
+++ b/core/http/views/model-editor.html
@@ -167,6 +167,22 @@ <h3 class="text-sm font-semibold text-gray-300 mb-3 flex items-center">
                                 Custom description for the model. If empty, a default description will be generated.
                             </p>
                         </div>
+                        
+                        <!-- Quantizations -->
+                        <div>
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-layer-group mr-2"></i>Quantizations
+                            </label>
+                            <input 
+                                x-model="commonPreferences.quantizations"
+                                type="text" 
+                                placeholder="q4_k_m,q4_k_s,q3_k_m (comma-separated)"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                :disabled="isSubmitting">
+                            <p class="mt-1 text-xs text-gray-400">
+                                Preferred quantizations (comma-separated). Examples: q4_k_m, q4_k_s, q3_k_m, q2_k. Leave empty to use default (q4_k_m).
+                            </p>
+                        </div>
                     </div>
                     
                     <!-- Custom Preferences -->
@@ -424,7 +440,8 @@ <h2 class="text-xl font-semibold text-white flex items-center gap-3">
         commonPreferences: {
             backend: '',
             name: '',
-            description: ''
+            description: '',
+            quantizations: ''
         },
         isSubmitting: false,
         currentJobId: null,
@@ -487,6 +504,9 @@ <h2 class="text-xl font-semibold text-white flex items-center gap-3">
                 if (this.commonPreferences.description && this.commonPreferences.description.trim()) {
                     prefsObj.description = this.commonPreferences.description.trim();
                 }
+                if (this.commonPreferences.quantizations && this.commonPreferences.quantizations.trim()) {
+                    prefsObj.quantizations = this.commonPreferences.quantizations.trim();
+                }
                 
                 // Add custom preferences (can override common ones)
                 this.preferences.forEach(pref => {
diff --git a/pkg/huggingface-api/client.go b/pkg/huggingface-api/client.go
index 37fb52bb22c1..9b1959f1d857 100644
--- a/pkg/huggingface-api/client.go
+++ b/pkg/huggingface-api/client.go
@@ -51,6 +51,7 @@ type ModelFile struct {
 	Size     int64
 	SHA256   string
 	IsReadme bool
+	URL      string
 }
 
 // ModelDetails represents detailed information about a model
@@ -215,6 +216,7 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) {
 	}
 
 	// Process each file
+	baseURL := strings.TrimSuffix(c.baseURL, "/api/models")
 	for _, file := range files {
 		fileName := filepath.Base(file.Path)
 		isReadme := strings.Contains(strings.ToLower(fileName), "readme")
@@ -227,11 +229,16 @@ func (c *Client) GetModelDetails(repoID string) (*ModelDetails, error) {
 			sha256 = file.Oid
 		}
 
+		// Construct the full URL for the file
+		// Use /resolve/main/ for downloading files (handles LFS properly)
+		fileURL := fmt.Sprintf("%s/%s/resolve/main/%s", baseURL, repoID, file.Path)
+
 		modelFile := ModelFile{
 			Path:     file.Path,
 			Size:     file.Size,
 			SHA256:   sha256,
 			IsReadme: isReadme,
+			URL:      fileURL,
 		}
 
 		details.Files = append(details.Files, modelFile)
diff --git a/pkg/huggingface-api/client_test.go b/pkg/huggingface-api/client_test.go
index ffa35bb90a94..5ef5f7900930 100644
--- a/pkg/huggingface-api/client_test.go
+++ b/pkg/huggingface-api/client_test.go
@@ -1,6 +1,7 @@
 package hfapi_test
 
 import (
+	"fmt"
 	"net/http"
 	"net/http/httptest"
 	"strings"
@@ -461,6 +462,13 @@ var _ = Describe("HuggingFace API Client", func() {
 			Expect(details.ReadmeFile).ToNot(BeNil())
 			Expect(details.ReadmeFile.Path).To(Equal("README.md"))
 			Expect(details.ReadmeFile.IsReadme).To(BeTrue())
+
+			// Verify URLs are set for all files
+			baseURL := strings.TrimSuffix(server.URL, "/api/models")
+			for _, file := range details.Files {
+				expectedURL := fmt.Sprintf("%s/test/model/resolve/main/%s", baseURL, file.Path)
+				Expect(file.URL).To(Equal(expectedURL))
+			}
 		})
 	})
 

From c9f30fbc0f6e152e4c211fc5addb9174069e1046 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 12 Nov 2025 18:26:43 +0100
Subject: [PATCH 14/16] feat(importer/llama.cpp): add support for mmproj files

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/gallery/importers/llama-cpp.go | 55 +++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/core/gallery/importers/llama-cpp.go b/core/gallery/importers/llama-cpp.go
index d4c8ff591fdb..65bebf7d1966 100644
--- a/core/gallery/importers/llama-cpp.go
+++ b/core/gallery/importers/llama-cpp.go
@@ -69,11 +69,17 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
 	}
 
 	preferedQuantizations, _ := preferencesMap["quantizations"].(string)
-	quants := []string{"q4_k_m"}
+	quants := []string{"q4_k_m", "q4_0", "q8_0", "f16"}
 	if preferedQuantizations != "" {
 		quants = strings.Split(preferedQuantizations, ",")
 	}
 
+	mmprojQuants, _ := preferencesMap["mmproj_quantizations"].(string)
+	mmprojQuantsList := []string{"fp16"}
+	if mmprojQuants != "" {
+		mmprojQuantsList = strings.Split(mmprojQuants, ",")
+	}
+
 	modelConfig := config.ModelConfig{
 		Name:                name,
 		Description:         description,
@@ -105,7 +111,11 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
 			},
 		}
 	} else if details.HuggingFace != nil {
+		lastMMProjFile := gallery.File{}
+		foundPreferedQuant := false
+
 		for _, file := range details.HuggingFace.Files {
+			// get the files of the prefered quants
 			if slices.ContainsFunc(quants, func(quant string) bool {
 				return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
 			}) {
@@ -115,14 +125,53 @@ func (i *LlamaCPPImporter) Import(details Details) (gallery.ModelConfig, error)
 					SHA256:   file.SHA256,
 				})
 			}
+			// Get the mmproj prefered quants
+			if strings.Contains(strings.ToLower(file.Path), "mmproj") {
+				lastMMProjFile = gallery.File{
+					URI:      file.URL,
+					Filename: filepath.Base(file.Path),
+					SHA256:   file.SHA256,
+				}
+				if slices.ContainsFunc(mmprojQuantsList, func(quant string) bool {
+					return strings.Contains(strings.ToLower(file.Path), strings.ToLower(quant))
+				}) {
+					foundPreferedQuant = true
+					cfg.Files = append(cfg.Files, lastMMProjFile)
+				}
+			}
 		}
-		if len(modelConfig.DownloadFiles) > 0 {
+
+		if !foundPreferedQuant && lastMMProjFile.URI != "" {
+			cfg.Files = append(cfg.Files, lastMMProjFile)
 			modelConfig.PredictionOptions = schema.PredictionOptions{
 				BasicModelRequest: schema.BasicModelRequest{
-					Model: modelConfig.DownloadFiles[0].Filename,
+					Model: lastMMProjFile.Filename,
 				},
 			}
 		}
+
+		// Find first mmproj file
+		for _, file := range cfg.Files {
+			if !strings.Contains(strings.ToLower(file.Filename), "mmproj") {
+				continue
+			}
+			modelConfig.MMProj = file.Filename
+			break
+		}
+
+		// Find first non-mmproj file
+		for _, file := range cfg.Files {
+			if strings.Contains(strings.ToLower(file.Filename), "mmproj") {
+				continue
+			}
+			modelConfig.PredictionOptions = schema.PredictionOptions{
+				BasicModelRequest: schema.BasicModelRequest{
+					Model: file.Filename,
+				},
+			}
+			break
+		}
+
 	}
 
 	data, err := yaml.Marshal(modelConfig)

From 0d02850f3ddbec347992a09267536652ad6535a3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 12 Nov 2025 18:33:01 +0100
Subject: [PATCH 15/16] add mmproj quants to common preferences

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/views/model-editor.html | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/core/http/views/model-editor.html b/core/http/views/model-editor.html
index d18aa6cdb924..d5b5fce764d7 100644
--- a/core/http/views/model-editor.html
+++ b/core/http/views/model-editor.html
@@ -183,6 +183,22 @@ <h3 class="text-sm font-semibold text-gray-300 mb-3 flex items-center">
                                 Preferred quantizations (comma-separated). Examples: q4_k_m, q4_k_s, q3_k_m, q2_k. Leave empty to use default (q4_k_m).
                             </p>
                         </div>
+                        
+                        <!-- MMProj Quantizations -->
+                        <div>
+                            <label class="block text-sm font-medium text-gray-300 mb-2">
+                                <i class="fas fa-image mr-2"></i>MMProj Quantizations
+                            </label>
+                            <input 
+                                x-model="commonPreferences.mmproj_quantizations"
+                                type="text" 
+                                placeholder="fp16,fp32 (comma-separated)"
+                                class="w-full px-4 py-2 bg-gray-900/90 border border-gray-700/70 rounded-lg text-gray-200 focus:border-green-500 focus:ring-2 focus:ring-green-500/50 focus:outline-none transition-all"
+                                :disabled="isSubmitting">
+                            <p class="mt-1 text-xs text-gray-400">
+                                Preferred MMProj quantizations (comma-separated). Examples: fp16, fp32. Leave empty to use default (fp16).
+                            </p>
+                        </div>
                     </div>
                     
                     <!-- Custom Preferences -->
@@ -441,7 +457,8 @@ <h2 class="text-xl font-semibold text-white flex items-center gap-3">
             backend: '',
             name: '',
             description: '',
-            quantizations: ''
+            quantizations: '',
+            mmproj_quantizations: ''
         },
         isSubmitting: false,
         currentJobId: null,
@@ -507,6 +524,9 @@ <h2 class="text-xl font-semibold text-white flex items-center gap-3">
                 if (this.commonPreferences.quantizations && this.commonPreferences.quantizations.trim()) {
                     prefsObj.quantizations = this.commonPreferences.quantizations.trim();
                 }
+                if (this.commonPreferences.mmproj_quantizations && this.commonPreferences.mmproj_quantizations.trim()) {
+                    prefsObj.mmproj_quantizations = this.commonPreferences.mmproj_quantizations.trim();
+                }
                 
                 // Add custom preferences (can override common ones)
                 this.preferences.forEach(pref => {

From 9366052eb955f7f12446eda762f638a2eaee61d3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 12 Nov 2025 19:24:49 +0100
Subject: [PATCH 16/16] Fix vlm usage in tokenizer mode with llama.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 backend/cpp/llama-cpp/grpc-server.cpp | 134 ++++++++++++++++++++++++--
 1 file changed, 124 insertions(+), 10 deletions(-)

diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp
index 790032d60316..17087fc060d0 100644
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -590,17 +590,71 @@ class BackendServiceImpl final : public backend::Backend::Service {
                 // Convert proto Messages to JSON format compatible with oaicompat_chat_params_parse
                 json body_json;
                 json messages_json = json::array();
+                
+                // Find the last user message index to attach images/audio to
+                int last_user_msg_idx = -1;
+                for (int i = request->messages_size() - 1; i >= 0; i--) {
+                    if (request->messages(i).role() == "user") {
+                        last_user_msg_idx = i;
+                        break;
+                    }
+                }
+                
                 for (int i = 0; i < request->messages_size(); i++) {
                     const auto& msg = request->messages(i);
                     json msg_json;
                     msg_json["role"] = msg.role();
                     
+                    bool is_last_user_msg = (i == last_user_msg_idx);
+                    bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0);
+                    
                     // Handle content - can be string, null, or array
                     // For multimodal content, we'll embed images/audio from separate fields
                     if (!msg.content().empty()) {
-                        msg_json["content"] = msg.content();
-                    } else if (request->images_size() > 0 || request->audios_size() > 0) {
-                        // If no content but has images/audio, create content array
+                        // Try to parse content as JSON to see if it's already an array
+                        json content_val;
+                        try {
+                            content_val = json::parse(msg.content());
+                        } catch (const json::parse_error&) {
+                            // Not JSON, treat as plain string
+                            content_val = msg.content();
+                        }
+                        
+                        // If content is a string and this is the last user message with images/audio, combine them
+                        if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
+                            json content_array = json::array();
+                            // Add text first
+                            content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
+                            // Add images
+                            if (request->images_size() > 0) {
+                                for (int j = 0; j < request->images_size(); j++) {
+                                    json image_chunk;
+                                    image_chunk["type"] = "image_url";
+                                    json image_url;
+                                    image_url["url"] = "data:image/jpeg;base64," + request->images(j);
+                                    image_chunk["image_url"] = image_url;
+                                    content_array.push_back(image_chunk);
+                                }
+                            }
+                            // Add audios
+                            if (request->audios_size() > 0) {
+                                for (int j = 0; j < request->audios_size(); j++) {
+                                    json audio_chunk;
+                                    audio_chunk["type"] = "input_audio";
+                                    json input_audio;
+                                    input_audio["data"] = request->audios(j);
+                                    input_audio["format"] = "wav"; // default, could be made configurable
+                                    audio_chunk["input_audio"] = input_audio;
+                                    content_array.push_back(audio_chunk);
+                                }
+                            }
+                            msg_json["content"] = content_array;
+                        } else {
+                            // Use content as-is (already array or not last user message)
+                            msg_json["content"] = content_val;
+                        }
+                    } else if (is_last_user_msg && has_images_or_audio) {
+                        // If no content but this is the last user message with images/audio, create content array
                         json content_array = json::array();
                         if (request->images_size() > 0) {
                             for (int j = 0; j < request->images_size(); j++) {
@@ -718,6 +772,9 @@ class BackendServiceImpl final : public backend::Backend::Service {
                 // Create parser options with current chat_templates to ensure tmpls is not null
                 oaicompat_parser_options parser_opt = ctx_server.oai_parser_opt;
                 parser_opt.tmpls = ctx_server.chat_templates.get(); // Ensure tmpls is set to current chat_templates
+                // Update allow_image and allow_audio based on current mctx state
+                parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
+                parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
                 json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
                 
                 // Extract the prompt from parsed data
@@ -758,7 +815,7 @@ class BackendServiceImpl final : public backend::Backend::Service {
 
             // If not using chat templates, extract files from image_data/audio_data fields
             // (If using chat templates, files were already extracted by oaicompat_chat_params_parse)
-            //if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
+            if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
                 const auto &images_data = data.find("image_data");
                 if (images_data != data.end() && images_data->is_array())
                 {
@@ -778,7 +835,7 @@ class BackendServiceImpl final : public backend::Backend::Service {
                         files.push_back(decoded_data);
                     }
                 }
-           // }
+            }
 
             const bool has_mtmd = ctx_server.mctx != nullptr;
 
@@ -917,17 +974,71 @@ class BackendServiceImpl final : public backend::Backend::Service {
                 // Convert proto Messages to JSON format compatible with oaicompat_chat_params_parse
                 json body_json;
                 json messages_json = json::array();
+                
+                // Find the last user message index to attach images/audio to
+                int last_user_msg_idx = -1;
+                for (int i = request->messages_size() - 1; i >= 0; i--) {
+                    if (request->messages(i).role() == "user") {
+                        last_user_msg_idx = i;
+                        break;
+                    }
+                }
+                
                 for (int i = 0; i < request->messages_size(); i++) {
                     const auto& msg = request->messages(i);
                     json msg_json;
                     msg_json["role"] = msg.role();
                     
+                    bool is_last_user_msg = (i == last_user_msg_idx);
+                    bool has_images_or_audio = (request->images_size() > 0 || request->audios_size() > 0);
+                    
                     // Handle content - can be string, null, or array
                     // For multimodal content, we'll embed images/audio from separate fields
                     if (!msg.content().empty()) {
-                        msg_json["content"] = msg.content();
-                    } else if (request->images_size() > 0 || request->audios_size() > 0) {
-                        // If no content but has images/audio, create content array
+                        // Try to parse content as JSON to see if it's already an array
+                        json content_val;
+                        try {
+                            content_val = json::parse(msg.content());
+                        } catch (const json::parse_error&) {
+                            // Not JSON, treat as plain string
+                            content_val = msg.content();
+                        }
+                        
+                        // If content is a string and this is the last user message with images/audio, combine them
+                        if (content_val.is_string() && is_last_user_msg && has_images_or_audio) {
+                            json content_array = json::array();
+                            // Add text first
+                            content_array.push_back({{"type", "text"}, {"text", content_val.get<std::string>()}});
+                            // Add images
+                            if (request->images_size() > 0) {
+                                for (int j = 0; j < request->images_size(); j++) {
+                                    json image_chunk;
+                                    image_chunk["type"] = "image_url";
+                                    json image_url;
+                                    image_url["url"] = "data:image/jpeg;base64," + request->images(j);
+                                    image_chunk["image_url"] = image_url;
+                                    content_array.push_back(image_chunk);
+                                }
+                            }
+                            // Add audios
+                            if (request->audios_size() > 0) {
+                                for (int j = 0; j < request->audios_size(); j++) {
+                                    json audio_chunk;
+                                    audio_chunk["type"] = "input_audio";
+                                    json input_audio;
+                                    input_audio["data"] = request->audios(j);
+                                    input_audio["format"] = "wav"; // default, could be made configurable
+                                    audio_chunk["input_audio"] = input_audio;
+                                    content_array.push_back(audio_chunk);
+                                }
+                            }
+                            msg_json["content"] = content_array;
+                        } else {
+                            // Use content as-is (already array or not last user message)
+                            msg_json["content"] = content_val;
+                        }
+                    } else if (is_last_user_msg && has_images_or_audio) {
+                        // If no content but this is the last user message with images/audio, create content array
                         json content_array = json::array();
                         if (request->images_size() > 0) {
                             for (int j = 0; j < request->images_size(); j++) {
@@ -1048,6 +1159,9 @@ class BackendServiceImpl final : public backend::Backend::Service {
                 // Create parser options with current chat_templates to ensure tmpls is not null
                 oaicompat_parser_options parser_opt = ctx_server.oai_parser_opt;
                 parser_opt.tmpls = ctx_server.chat_templates.get(); // Ensure tmpls is set to current chat_templates
+                // Update allow_image and allow_audio based on current mctx state
+                parser_opt.allow_image = ctx_server.mctx ? mtmd_support_vision(ctx_server.mctx) : false;
+                parser_opt.allow_audio = ctx_server.mctx ? mtmd_support_audio(ctx_server.mctx) : false;
                 json parsed_data = oaicompat_chat_params_parse(body_json, parser_opt, files);
                 
                 // Extract the prompt from parsed data
@@ -1088,7 +1202,7 @@ class BackendServiceImpl final : public backend::Backend::Service {
 
             // If not using chat templates, extract files from image_data/audio_data fields
             // (If using chat templates, files were already extracted by oaicompat_chat_params_parse)
-           // if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
+            if (!request->usetokenizertemplate() || request->messages_size() == 0 || ctx_server.chat_templates == nullptr) {
                 const auto &images_data = data.find("image_data");
                 if (images_data != data.end() && images_data->is_array())
                 {
@@ -1110,7 +1224,7 @@ class BackendServiceImpl final : public backend::Backend::Service {
                         files.push_back(decoded_data);
                     }
                 }
-           // }
+            }
 
             // process files
             const bool has_mtmd = ctx_server.mctx != nullptr;