From 2765c008266939d3483b62d67e7d111c9c9706c3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 May 2026 19:06:59 +0000 Subject: [PATCH 01/11] feat(galleryop): add TargetNodeID to ManagementOp for single-node installs Signed-off-by: Ettore Di Giacinto --- core/services/galleryop/backends_test.go | 20 ++++++++++++++++++++ core/services/galleryop/operation.go | 6 ++++++ 2 files changed, 26 insertions(+) diff --git a/core/services/galleryop/backends_test.go b/core/services/galleryop/backends_test.go index ef9a46c3d25a..60e95c6ee33b 100644 --- a/core/services/galleryop/backends_test.go +++ b/core/services/galleryop/backends_test.go @@ -196,4 +196,24 @@ var _ = Describe("ManagementOp with External Backend", func() { Expect(op.ExternalName).To(Equal("test-backend")) Expect(op.ExternalAlias).To(Equal("test-alias")) }) + + Context("TargetNodeID field", func() { + It("defaults to empty string", func() { + op := galleryop.ManagementOp[string, string]{ + ExternalURI: "oci://example.com/backend:latest", + } + Expect(op.TargetNodeID).To(BeEmpty()) + }) + + It("preserves TargetNodeID across a channel send", func() { + ch := make(chan galleryop.ManagementOp[string, string], 1) + ch <- galleryop.ManagementOp[string, string]{ + GalleryElementName: "llama-cpp", + TargetNodeID: "node-abc-123", + } + received := <-ch + Expect(received.TargetNodeID).To(Equal("node-abc-123")) + Expect(received.GalleryElementName).To(Equal("llama-cpp")) + }) + }) }) diff --git a/core/services/galleryop/operation.go b/core/services/galleryop/operation.go index 1c766eadefed..1074138013cc 100644 --- a/core/services/galleryop/operation.go +++ b/core/services/galleryop/operation.go @@ -30,6 +30,12 @@ type ManagementOp[T any, E any] struct { ExternalName string // Custom name for the backend ExternalAlias string // Custom alias for the backend + // TargetNodeID scopes a backend install/upgrade to a single worker node. + // Empty means fan out to every healthy backend node (the previous behavior). + // Set by InstallBackendOnNodeEndpoint so an admin can install a hardware-specific + // build on one node without touching the rest of the cluster. + TargetNodeID string + // Upgrade is true if this is an upgrade operation (not a fresh install) Upgrade bool } From e820e146951cd2de1f55a546f37ec6619d882a97 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 May 2026 19:10:22 +0000 Subject: [PATCH 02/11] feat(galleryop): add NodeScopedKey helpers for per-node opcache rows Signed-off-by: Ettore Di Giacinto --- core/services/galleryop/backends_test.go | 31 +++++++++++++++++++++ core/services/galleryop/operation.go | 34 ++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/core/services/galleryop/backends_test.go b/core/services/galleryop/backends_test.go index 60e95c6ee33b..d7a304b099b2 100644 --- a/core/services/galleryop/backends_test.go +++ b/core/services/galleryop/backends_test.go @@ -216,4 +216,35 @@ var _ = Describe("ManagementOp with External Backend", func() { Expect(received.GalleryElementName).To(Equal("llama-cpp")) }) }) + + Describe("NodeScopedKey", func() { + It("builds a unique key per (nodeID, backend) pair", func() { + Expect(galleryop.NodeScopedKey("node-a", "llama-cpp")).To(Equal("node:node-a:llama-cpp")) + Expect(galleryop.NodeScopedKey("node-b", "llama-cpp")).To(Equal("node:node-b:llama-cpp")) + Expect(galleryop.NodeScopedKey("node-a", "vllm")).To(Equal("node:node-a:vllm")) + }) + + It("handles backend names containing colons", func() { + // Gallery IDs sometimes look like "official@llama-cpp"; nodeIDs are UUIDs + // without colons, but the backend slug may contain anything. Splitting on + // the first colon after the prefix MUST yield the full backend back. + key := galleryop.NodeScopedKey("node-1", "official@llama-cpp:v2") + node, backend, ok := galleryop.ParseNodeScopedKey(key) + Expect(ok).To(BeTrue()) + Expect(node).To(Equal("node-1")) + Expect(backend).To(Equal("official@llama-cpp:v2")) + }) + + It("rejects keys without the node prefix", func() { + _, _, ok := galleryop.ParseNodeScopedKey("llama-cpp") + Expect(ok).To(BeFalse()) + _, _, ok = galleryop.ParseNodeScopedKey("official@llama-cpp") + Expect(ok).To(BeFalse()) + }) + + It("rejects malformed node-prefixed keys", func() { + _, _, ok := galleryop.ParseNodeScopedKey("node:only-one-segment") + Expect(ok).To(BeFalse()) + }) + }) }) diff --git a/core/services/galleryop/operation.go b/core/services/galleryop/operation.go index 1074138013cc..1fc1432246ae 100644 --- a/core/services/galleryop/operation.go +++ b/core/services/galleryop/operation.go @@ -121,3 +121,37 @@ func (m *OpCache) GetStatus() (map[string]string, map[string]string) { return processingModelsData, taskTypes } + +// NodeScopedKeyPrefix is the opcache key prefix used by InstallBackendOnNodeEndpoint +// so per-node installs do not collide on the bare backend name. Format: +// "node::". Read by /api/operations to extract nodeID for the UI. +const NodeScopedKeyPrefix = "node:" + +// NodeScopedKey returns the opcache key for a node-scoped backend operation. +// The prefix lets ParseNodeScopedKey detach the nodeID back out so the +// operations endpoint can surface it without storing nodeID separately. +func NodeScopedKey(nodeID, backend string) string { + return NodeScopedKeyPrefix + nodeID + ":" + backend +} + +// ParseNodeScopedKey extracts (nodeID, backend) from a key built by NodeScopedKey. +// Returns ok=false for keys that lack the prefix or are missing the backend +// segment. Backend names containing colons are preserved because we split on +// the first colon after the prefix only. +func ParseNodeScopedKey(key string) (nodeID, backend string, ok bool) { + if len(key) <= len(NodeScopedKeyPrefix) || key[:len(NodeScopedKeyPrefix)] != NodeScopedKeyPrefix { + return "", "", false + } + rest := key[len(NodeScopedKeyPrefix):] + idx := -1 + for i := 0; i < len(rest); i++ { + if rest[i] == ':' { + idx = i + break + } + } + if idx < 0 || idx == len(rest)-1 { + return "", "", false + } + return rest[:idx], rest[idx+1:], true +} From c68d087828938a9d7908abc7d6ac083bcce47a7e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 May 2026 19:14:10 +0000 Subject: [PATCH 03/11] refactor(galleryop): use strings.Cut for NodeScopedKey parsing, reject empty nodeID Signed-off-by: Ettore Di Giacinto --- core/services/galleryop/backends_test.go | 5 +++++ core/services/galleryop/operation.go | 23 +++++++++-------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/core/services/galleryop/backends_test.go b/core/services/galleryop/backends_test.go index d7a304b099b2..c9e0fbc1c6d2 100644 --- a/core/services/galleryop/backends_test.go +++ b/core/services/galleryop/backends_test.go @@ -246,5 +246,10 @@ var _ = Describe("ManagementOp with External Backend", func() { _, _, ok := galleryop.ParseNodeScopedKey("node:only-one-segment") Expect(ok).To(BeFalse()) }) + + It("rejects keys with an empty nodeID segment", func() { + _, _, ok := galleryop.ParseNodeScopedKey("node::llama-cpp") + Expect(ok).To(BeFalse()) + }) }) }) diff --git a/core/services/galleryop/operation.go b/core/services/galleryop/operation.go index 1fc1432246ae..c022a8976fde 100644 --- a/core/services/galleryop/operation.go +++ b/core/services/galleryop/operation.go @@ -2,6 +2,7 @@ package galleryop import ( "context" + "strings" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/pkg/xsync" @@ -135,23 +136,17 @@ func NodeScopedKey(nodeID, backend string) string { } // ParseNodeScopedKey extracts (nodeID, backend) from a key built by NodeScopedKey. -// Returns ok=false for keys that lack the prefix or are missing the backend -// segment. Backend names containing colons are preserved because we split on -// the first colon after the prefix only. +// Returns ok=false for keys that lack the prefix or are missing the nodeID or +// backend segment. Backend names containing colons are preserved because we +// split on the first colon after the prefix only. func ParseNodeScopedKey(key string) (nodeID, backend string, ok bool) { - if len(key) <= len(NodeScopedKeyPrefix) || key[:len(NodeScopedKeyPrefix)] != NodeScopedKeyPrefix { + rest, hasPrefix := strings.CutPrefix(key, NodeScopedKeyPrefix) + if !hasPrefix { return "", "", false } - rest := key[len(NodeScopedKeyPrefix):] - idx := -1 - for i := 0; i < len(rest); i++ { - if rest[i] == ':' { - idx = i - break - } - } - if idx < 0 || idx == len(rest)-1 { + nodeID, backend, ok = strings.Cut(rest, ":") + if !ok || nodeID == "" || backend == "" { return "", "", false } - return rest[:idx], rest[idx+1:], true + return nodeID, backend, true } From cd00ddc2513ad9d6751d772999beb27bffe4f8f7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 May 2026 19:18:34 +0000 Subject: [PATCH 04/11] feat(nodes): scope DistributedBackendManager.InstallBackend to single node via TargetNodeID Signed-off-by: Ettore Di Giacinto --- core/services/nodes/managers_distributed.go | 14 ++++++- .../nodes/managers_distributed_test.go | 41 +++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/core/services/nodes/managers_distributed.go b/core/services/nodes/managers_distributed.go index e5c99d9b72b7..b5a92fcbb4f8 100644 --- a/core/services/nodes/managers_distributed.go +++ b/core/services/nodes/managers_distributed.go @@ -331,13 +331,23 @@ func (d *DistributedBackendManager) ListBackends() (gallery.SystemBackends, erro // non-healthy nodes get retried when they come back instead of being silently // skipped. Reply success from the NATS round-trip deletes the queue row; // reply.Success==false is treated as an error so the row stays for retry. +// +// When op.TargetNodeID is set, only that node is visited - the same allowlist +// path UpgradeBackend uses. Empty TargetNodeID preserves the original fan-out +// behavior so the periodic reconciler and /api/backends/install/:id keep +// working unchanged. func (d *DistributedBackendManager) InstallBackend(ctx context.Context, op *galleryop.ManagementOp[gallery.GalleryBackend, any], progressCb galleryop.ProgressCallback) error { galleriesJSON, _ := json.Marshal(op.Galleries) backendName := op.GalleryElementName - result, err := d.enqueueAndDrainBackendOp(ctx, OpBackendInstall, backendName, galleriesJSON, nil, func(node BackendNode) error { + var targetNodeIDs map[string]bool + if op.TargetNodeID != "" { + targetNodeIDs = map[string]bool{op.TargetNodeID: true} + } + + result, err := d.enqueueAndDrainBackendOp(ctx, OpBackendInstall, backendName, galleriesJSON, targetNodeIDs, func(node BackendNode) error { // Admin-driven backend install: not tied to a specific replica slot. - // Pass replica 0 — the worker's processKey is "backend#0" when no + // Pass replica 0 - the worker's processKey is "backend#0" when no // modelID is supplied, matching pre-PR4 behavior. reply, err := d.adapter.InstallBackend(node.ID, backendName, "", string(galleriesJSON), op.ExternalURI, op.ExternalName, op.ExternalAlias, 0) if err != nil { diff --git a/core/services/nodes/managers_distributed_test.go b/core/services/nodes/managers_distributed_test.go index 79ae8c7b980f..793ed5659ea1 100644 --- a/core/services/nodes/managers_distributed_test.go +++ b/core/services/nodes/managers_distributed_test.go @@ -311,6 +311,47 @@ var _ = Describe("DistributedBackendManager", func() { Expect(mgr.InstallBackend(ctx, op("vllm-development"), nil)).To(Succeed()) }) }) + + Context("when op.TargetNodeID is set to a healthy node", func() { + It("installs only on that node, leaving the others untouched", func() { + target := registerHealthyBackend("worker-target", "10.0.0.1:50051") + other := registerHealthyBackend("worker-other", "10.0.0.2:50051") + + mc.scriptReply(messaging.SubjectNodeBackendInstall(target.ID), + messaging.BackendInstallReply{Success: true, Address: "10.0.0.1:50100"}) + // No reply scripted for `other`: if InstallBackend fans out + // to it, the fakeNoRespondersErr default would surface and + // the test would fail. + + targetedOp := &galleryop.ManagementOp[gallery.GalleryBackend, any]{ + GalleryElementName: "llama-cpp", + TargetNodeID: target.ID, + } + Expect(mgr.InstallBackend(ctx, targetedOp, nil)).To(Succeed()) + + mc.mu.Lock() + defer mc.mu.Unlock() + Expect(mc.calls).To(HaveLen(1)) + Expect(mc.calls[0].Subject).To(Equal(messaging.SubjectNodeBackendInstall(target.ID))) + Expect(mc.calls[0].Subject).ToNot(Equal(messaging.SubjectNodeBackendInstall(other.ID))) + }) + }) + + Context("when op.TargetNodeID is set to a node that does not exist", func() { + It("returns nil without sending any NATS request", func() { + registerHealthyBackend("worker-a", "10.0.0.1:50051") + + ghostOp := &galleryop.ManagementOp[gallery.GalleryBackend, any]{ + GalleryElementName: "llama-cpp", + TargetNodeID: "this-id-does-not-exist", + } + Expect(mgr.InstallBackend(ctx, ghostOp, nil)).To(Succeed()) + + mc.mu.Lock() + defer mc.mu.Unlock() + Expect(mc.calls).To(BeEmpty()) + }) + }) }) Describe("UpgradeBackend", func() { From 746ea0a7cc1e1a32853a17fc1873c84732924313 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 May 2026 19:26:58 +0000 Subject: [PATCH 05/11] feat(http): make /api/nodes/:id/backends/install async via gallery service job queue The handler previously called unloader.InstallBackend synchronously and blocked the browser for up to 3 minutes waiting on the NATS reply. It now enqueues a TargetNodeID-scoped ManagementOp on BackendGalleryChannel and returns HTTP 202 + jobID immediately, matching /api/backends/install/:id. The opcache key is built via NodeScopedKey(nodeID, backend) so concurrent installs of the same backend across different nodes do not stomp each other. galleryService/opcache/appConfig are threaded through RegisterNodeAdminRoutes for this. Assisted-by: Claude:opus-4-7 [Edit] [Bash] Signed-off-by: Ettore Di Giacinto --- core/http/app.go | 2 +- core/http/endpoints/localai/nodes.go | 87 ++++++++++++--- .../localai/nodes_install_async_test.go | 103 ++++++++++++++++++ core/http/routes/nodes.go | 11 +- 4 files changed, 182 insertions(+), 21 deletions(-) create mode 100644 core/http/endpoints/localai/nodes_install_async_test.go diff --git a/core/http/app.go b/core/http/app.go index 464e506dbe54..0f7bbbbcba30 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -407,7 +407,7 @@ func API(application *application.Application) (*echo.Echo, error) { } } routes.RegisterNodeSelfServiceRoutes(e, registry, distCfg.RegistrationToken, distCfg.AutoApproveNodes, application.AuthDB(), application.ApplicationConfig().Auth.APIKeyHMACSecret) - routes.RegisterNodeAdminRoutes(e, registry, remoteUnloader, adminMiddleware, application.AuthDB(), application.ApplicationConfig().Auth.APIKeyHMACSecret, application.ApplicationConfig().Distributed.RegistrationToken) + routes.RegisterNodeAdminRoutes(e, registry, remoteUnloader, application.GalleryService(), opcache, application.ApplicationConfig(), adminMiddleware, application.AuthDB(), application.ApplicationConfig().Auth.APIKeyHMACSecret, application.ApplicationConfig().Distributed.RegistrationToken) // Distributed SSE routes (job progress + agent events via NATS) if d := application.Distributed(); d != nil { diff --git a/core/http/endpoints/localai/nodes.go b/core/http/endpoints/localai/nodes.go index 9b622acf51fc..8479686eb1f5 100644 --- a/core/http/endpoints/localai/nodes.go +++ b/core/http/endpoints/localai/nodes.go @@ -16,8 +16,11 @@ import ( "github.com/google/uuid" "github.com/gorilla/websocket" "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/http/auth" "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/core/services/galleryop" "github.com/mudler/LocalAI/core/services/nodes" "github.com/mudler/xlog" "gorm.io/gorm" @@ -381,14 +384,24 @@ func ResumeNodeEndpoint(registry *nodes.NodeRegistry) echo.HandlerFunc { } } -// InstallBackendOnNodeEndpoint triggers backend installation on a worker node via NATS. +// InstallBackendOnNodeEndpoint triggers backend installation on a worker node. +// Async: enqueues a ManagementOp on the gallery service channel and returns a +// jobID immediately. The gallery service worker goroutine drives the actual +// install via DistributedBackendManager.InstallBackend, which honors the op's +// TargetNodeID to scope the fan-out to one node. The UI polls /api/backends/job/:uid +// for progress, mirroring /api/backends/install/:id. +// // Backend can be either a gallery ID (resolved against BackendGalleries) or a -// direct URI install (URI + Name + optional Alias) — same shape as the +// direct URI install (URI + Name + optional Alias) - same shape as the // standalone /api/backends/install-external path, just scoped to one node. -func InstallBackendOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.HandlerFunc { +// +// The legacy unloader argument is retained for signature symmetry with +// DeleteBackendOnNodeEndpoint / ListBackendsOnNodeEndpoint but is no longer +// used here - the async path goes through galleryService. +func InstallBackendOnNodeEndpoint(_ nodes.NodeCommandSender, galleryService *galleryop.GalleryService, opcache *galleryop.OpCache, appConfig *config.ApplicationConfig) echo.HandlerFunc { return func(c echo.Context) error { - if unloader == nil { - return c.JSON(http.StatusServiceUnavailable, nodeError(http.StatusServiceUnavailable, "NATS not configured")) + if galleryService == nil { + return c.JSON(http.StatusServiceUnavailable, nodeError(http.StatusServiceUnavailable, "gallery service not configured")) } nodeID := c.Param("id") var req struct { @@ -401,25 +414,63 @@ func InstallBackendOnNodeEndpoint(unloader nodes.NodeCommandSender) echo.Handler if err := c.Bind(&req); err != nil { return c.JSON(http.StatusBadRequest, nodeError(http.StatusBadRequest, "invalid request body")) } - // Either a gallery backend name or a direct URI must be supplied. if req.Backend == "" && req.URI == "" { return c.JSON(http.StatusBadRequest, nodeError(http.StatusBadRequest, "backend name or uri required")) } - // Admin-driven backend install: not tied to a specific replica slot - // (no model is being loaded). Pass replica 0 to match the worker's - // admin process-key convention (`backend#0`). The worker's fast path - // takes over if the backend is already running — upgrades go through - // the dedicated /api/backends/upgrade path on backend.upgrade. - reply, err := unloader.InstallBackend(nodeID, req.Backend, "", req.BackendGalleries, req.URI, req.Name, req.Alias, 0) + + jobUUID, err := uuid.NewUUID() if err != nil { - xlog.Error("Failed to install backend on node", "node", nodeID, "backend", req.Backend, "uri", req.URI, "error", err) - return c.JSON(http.StatusInternalServerError, nodeError(http.StatusInternalServerError, "failed to install backend on node")) + return c.JSON(http.StatusInternalServerError, nodeError(http.StatusInternalServerError, "failed to generate job id")) + } + jobID := jobUUID.String() + + // Cache key: for gallery installs, use the backend slug; for URI + // installs prefer the provided Name (falling back to URI). All keys + // are node-scoped so concurrent installs of the same backend on + // different nodes do not stomp each other in opcache. + backendKey := req.Backend + if backendKey == "" { + backendKey = req.Name + if backendKey == "" { + backendKey = req.URI + } } - if !reply.Success { - xlog.Error("Backend install failed on node", "node", nodeID, "backend", req.Backend, "uri", req.URI, "error", reply.Error) - return c.JSON(http.StatusInternalServerError, nodeError(http.StatusInternalServerError, "backend installation failed")) + cacheKey := galleryop.NodeScopedKey(nodeID, backendKey) + opcache.SetBackend(cacheKey, jobID) + + // Optional caller-supplied galleries override. Mirrors the standalone + // install path so an admin can point at a private gallery. + galleries := appConfig.BackendGalleries + if req.BackendGalleries != "" { + var custom []config.Gallery + if err := json.Unmarshal([]byte(req.BackendGalleries), &custom); err == nil && len(custom) > 0 { + galleries = custom + } + } + + ctx, cancelFunc := context.WithCancel(context.Background()) + op := galleryop.ManagementOp[gallery.GalleryBackend, any]{ + ID: jobID, + GalleryElementName: req.Backend, + Galleries: galleries, + TargetNodeID: nodeID, + ExternalURI: req.URI, + ExternalName: req.Name, + ExternalAlias: req.Alias, + Context: ctx, + CancelFunc: cancelFunc, } - return c.JSON(http.StatusOK, map[string]string{"message": "backend installed"}) + galleryService.StoreCancellation(jobID, cancelFunc) + go func() { + galleryService.BackendGalleryChannel <- op + }() + + xlog.Info("Node-scoped backend install dispatched", "node", nodeID, "backend", req.Backend, "uri", req.URI, "jobID", jobID) + return c.JSON(http.StatusAccepted, map[string]string{ + "jobID": jobID, + "statusURL": "/api/backends/job/" + jobID, + "message": "backend installation started", + }) } } diff --git a/core/http/endpoints/localai/nodes_install_async_test.go b/core/http/endpoints/localai/nodes_install_async_test.go new file mode 100644 index 000000000000..e90259ebfee0 --- /dev/null +++ b/core/http/endpoints/localai/nodes_install_async_test.go @@ -0,0 +1,103 @@ +package localai_test + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + + "github.com/labstack/echo/v4" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/http/endpoints/localai" + "github.com/mudler/LocalAI/core/services/galleryop" +) + +// InstallBackendOnNodeEndpoint became async to stop blocking the browser on +// the 3-minute NATS reply timeout. These specs lock in the new contract: +// HTTP 202 with a jobID, a ManagementOp enqueued on the gallery channel, and +// an opcache entry keyed by NodeScopedKey so concurrent installs of the same +// backend on different nodes do not stomp each other. +var _ = Describe("InstallBackendOnNodeEndpoint async behavior", func() { + var ( + e *echo.Echo + galleryService *galleryop.GalleryService + opcache *galleryop.OpCache + appCfg *config.ApplicationConfig + dispatched chan galleryop.ManagementOp[gallery.GalleryBackend, any] + ) + + BeforeEach(func() { + e = echo.New() + appCfg = &config.ApplicationConfig{ + BackendGalleries: []config.Gallery{{Name: "test-gallery", URL: "http://example.com"}}, + } + galleryService = galleryop.NewGalleryService(appCfg, nil) + opcache = galleryop.NewOpCache(galleryService) + // Drain the gallery channel into a buffered side channel so the + // handler's `go func() { ch <- op }()` send does not block waiting + // for the real worker (which is not running in this unit test). + dispatched = make(chan galleryop.ManagementOp[gallery.GalleryBackend, any], 4) + go func() { + for op := range galleryService.BackendGalleryChannel { + dispatched <- op + } + }() + }) + + It("returns 202 with a jobID and dispatches a TargetNodeID-scoped op", func() { + body := `{"backend": "llama-cpp"}` + req := httptest.NewRequest(http.MethodPost, "/api/nodes/node-xyz/backends/install", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + c := e.NewContext(req, rec) + c.SetParamNames("id") + c.SetParamValues("node-xyz") + + handler := localai.InstallBackendOnNodeEndpoint(nil, galleryService, opcache, appCfg) + Expect(handler(c)).To(Succeed()) + Expect(rec.Code).To(Equal(http.StatusAccepted)) + + var resp map[string]any + Expect(json.Unmarshal(rec.Body.Bytes(), &resp)).To(Succeed()) + Expect(resp["jobID"]).To(BeAssignableToTypeOf("")) + Expect(resp["jobID"].(string)).ToNot(BeEmpty()) + Expect(resp["message"]).To(Equal("backend installation started")) + + Eventually(dispatched, "2s").Should(Receive()) + Expect(opcache.Exists(galleryop.NodeScopedKey("node-xyz", "llama-cpp"))).To(BeTrue()) + Expect(opcache.IsBackendOp(galleryop.NodeScopedKey("node-xyz", "llama-cpp"))).To(BeTrue()) + }) + + It("returns 400 when neither backend nor uri is supplied", func() { + req := httptest.NewRequest(http.MethodPost, "/api/nodes/node-xyz/backends/install", bytes.NewBufferString(`{}`)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + c := e.NewContext(req, rec) + c.SetParamNames("id") + c.SetParamValues("node-xyz") + + handler := localai.InstallBackendOnNodeEndpoint(nil, galleryService, opcache, appCfg) + Expect(handler(c)).To(Succeed()) + Expect(rec.Code).To(Equal(http.StatusBadRequest)) + }) + + It("accepts a direct URI install and uses the name as the cache key", func() { + body := `{"uri": "oci://example.com/custom-backend:v1", "name": "custom"}` + req := httptest.NewRequest(http.MethodPost, "/api/nodes/node-xyz/backends/install", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + c := e.NewContext(req, rec) + c.SetParamNames("id") + c.SetParamValues("node-xyz") + + handler := localai.InstallBackendOnNodeEndpoint(nil, galleryService, opcache, appCfg) + Expect(handler(c)).To(Succeed()) + Expect(rec.Code).To(Equal(http.StatusAccepted)) + + Expect(opcache.Exists(galleryop.NodeScopedKey("node-xyz", "custom"))).To(BeTrue()) + }) +}) diff --git a/core/http/routes/nodes.go b/core/http/routes/nodes.go index 7ea0c91932d8..bbf574c6b111 100644 --- a/core/http/routes/nodes.go +++ b/core/http/routes/nodes.go @@ -6,7 +6,9 @@ import ( "strings" "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/http/endpoints/localai" + "github.com/mudler/LocalAI/core/services/galleryop" "github.com/mudler/LocalAI/core/services/nodes" "gorm.io/gorm" ) @@ -53,7 +55,12 @@ func RegisterNodeSelfServiceRoutes(e *echo.Echo, registry *nodes.NodeRegistry, r // RegisterNodeAdminRoutes registers /api/nodes/ endpoints used by admins // (list, get, get models, drain, delete, approve, backend management). Protected by admin middleware. -func RegisterNodeAdminRoutes(e *echo.Echo, registry *nodes.NodeRegistry, unloader nodes.NodeCommandSender, adminMw echo.MiddlewareFunc, authDB *gorm.DB, hmacSecret string, registrationToken string) { +// +// galleryService/opcache/appConfig are threaded in for the async node-scoped +// backend install path (POST /:id/backends/install). That handler enqueues a +// ManagementOp on the gallery channel rather than blocking on a NATS reply, so +// the browser gets HTTP 202 + jobID immediately instead of waiting up to 3 minutes. +func RegisterNodeAdminRoutes(e *echo.Echo, registry *nodes.NodeRegistry, unloader nodes.NodeCommandSender, galleryService *galleryop.GalleryService, opcache *galleryop.OpCache, appConfig *config.ApplicationConfig, adminMw echo.MiddlewareFunc, authDB *gorm.DB, hmacSecret string, registrationToken string) { if registry == nil { return } @@ -78,7 +85,7 @@ func RegisterNodeAdminRoutes(e *echo.Echo, registry *nodes.NodeRegistry, unloade // Backend management on workers admin.GET("/:id/backends", localai.ListBackendsOnNodeEndpoint(unloader)) - admin.POST("/:id/backends/install", localai.InstallBackendOnNodeEndpoint(unloader)) + admin.POST("/:id/backends/install", localai.InstallBackendOnNodeEndpoint(unloader, galleryService, opcache, appConfig)) admin.POST("/:id/backends/delete", localai.DeleteBackendOnNodeEndpoint(unloader)) // Model management on workers From 7629418a7efb8ab62c17b38811caf6457c8f9174 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 May 2026 19:33:16 +0000 Subject: [PATCH 06/11] refactor(http): log malformed backend_galleries override and stop test drain goroutine Assisted-by: Claude:opus-4-7 [Edit] [Bash] Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/localai/nodes.go | 4 +++- .../localai/nodes_install_async_test.go | 24 +++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/core/http/endpoints/localai/nodes.go b/core/http/endpoints/localai/nodes.go index 8479686eb1f5..fb69dcd8de8f 100644 --- a/core/http/endpoints/localai/nodes.go +++ b/core/http/endpoints/localai/nodes.go @@ -443,7 +443,9 @@ func InstallBackendOnNodeEndpoint(_ nodes.NodeCommandSender, galleryService *gal galleries := appConfig.BackendGalleries if req.BackendGalleries != "" { var custom []config.Gallery - if err := json.Unmarshal([]byte(req.BackendGalleries), &custom); err == nil && len(custom) > 0 { + if err := json.Unmarshal([]byte(req.BackendGalleries), &custom); err != nil { + xlog.Warn("Ignoring malformed backend_galleries override; falling back to configured galleries", "error", err, "nodeID", nodeID) + } else if len(custom) > 0 { galleries = custom } } diff --git a/core/http/endpoints/localai/nodes_install_async_test.go b/core/http/endpoints/localai/nodes_install_async_test.go index e90259ebfee0..c3ae9745a29b 100644 --- a/core/http/endpoints/localai/nodes_install_async_test.go +++ b/core/http/endpoints/localai/nodes_install_async_test.go @@ -28,6 +28,8 @@ var _ = Describe("InstallBackendOnNodeEndpoint async behavior", func() { opcache *galleryop.OpCache appCfg *config.ApplicationConfig dispatched chan galleryop.ManagementOp[gallery.GalleryBackend, any] + done chan struct{} + drainExited chan struct{} ) BeforeEach(func() { @@ -41,13 +43,31 @@ var _ = Describe("InstallBackendOnNodeEndpoint async behavior", func() { // handler's `go func() { ch <- op }()` send does not block waiting // for the real worker (which is not running in this unit test). dispatched = make(chan galleryop.ManagementOp[gallery.GalleryBackend, any], 4) + done = make(chan struct{}) + drainExited = make(chan struct{}) go func() { - for op := range galleryService.BackendGalleryChannel { - dispatched <- op + defer close(drainExited) + for { + select { + case op := <-galleryService.BackendGalleryChannel: + dispatched <- op + case <-done: + return + } } }() }) + AfterEach(func() { + // Signal the drain goroutine to exit. We do NOT close + // BackendGalleryChannel: the handler's dispatch goroutine may still + // be pending (specs that don't Eventually-Receive), and a send on a + // closed channel panics. Signalling via `done` lets the drain + // goroutine return without touching the gallery channel. + close(done) + Eventually(drainExited, "2s").Should(BeClosed()) + }) + It("returns 202 with a jobID and dispatches a TargetNodeID-scoped op", func() { body := `{"backend": "llama-cpp"}` req := httptest.NewRequest(http.MethodPost, "/api/nodes/node-xyz/backends/install", bytes.NewBufferString(body)) From 36f32c6b76f548d34ede89619ca1d239340b7472 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 May 2026 19:37:30 +0000 Subject: [PATCH 07/11] feat(api): expose nodeID for node-scoped backend ops in /api/operations Node-scoped backend installs land in opcache under "node::" keys. Without splitting that prefix back out, the operations panel renders the full key as the display name and has no structured way to label which worker an install is targeting. Detect the prefix, surface nodeID as its own response field, and reduce the display name back to the bare backend slug. Bare (non-scoped) ops are left untouched so legacy installs do not gain a misleading empty nodeID. Assisted-by: Claude:opus-4-7 [Edit] [Bash] Signed-off-by: Ettore Di Giacinto --- core/http/routes/ui_api.go | 17 ++++ core/http/routes/ui_api_operations_test.go | 98 ++++++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 core/http/routes/ui_api_operations_test.go diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index d75f139476d0..9b3067459265 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -214,6 +214,17 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model } } + // Node-scoped backend ops (from /api/nodes/:id/backends/install) + // carry the nodeID inside the opcache key as "node::". + // Pull it back out so the operations panel can label which node the + // install is targeting, and so the display name is just the backend + // slug instead of the full prefixed key. + scopedNodeID := "" + if nodeID, backend, ok := galleryop.ParseNodeScopedKey(galleryID); ok { + scopedNodeID = nodeID + galleryID = backend + } + // Extract display name (remove repo prefix if exists) displayName := galleryID if strings.Contains(galleryID, "@") { @@ -237,6 +248,12 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model "cancellable": isCancellable, "message": message, } + // Only attach nodeID when this op was node-scoped: an empty string + // would mislead the UI into rendering a node attribution that never + // existed in the first place. + if scopedNodeID != "" { + opData["nodeID"] = scopedNodeID + } if status != nil && status.Error != nil { opData["error"] = status.Error.Error() } diff --git a/core/http/routes/ui_api_operations_test.go b/core/http/routes/ui_api_operations_test.go new file mode 100644 index 000000000000..2ed4aad9c30c --- /dev/null +++ b/core/http/routes/ui_api_operations_test.go @@ -0,0 +1,98 @@ +package routes_test + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + + "github.com/labstack/echo/v4" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/mudler/LocalAI/core/application" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/http/routes" + "github.com/mudler/LocalAI/core/services/galleryop" +) + +// These specs guard the contract between the opcache (which stores +// node-scoped backend installs under a "node::" key) and the +// /api/operations response surface the React UI polls. Without nodeID +// extraction the panel would show the raw prefixed key and have no way to +// label which worker an install is targeting. +var _ = Describe("/api/operations with node-scoped backend ops", func() { + // We pass a zero-value *application.Application because the handler's + // distributed-services branch guards on a nil check on the returned + // *DistributedServices, which is nil for a fresh Application{}. + noopMw := func(next echo.HandlerFunc) echo.HandlerFunc { return next } + + It("emits nodeID and the un-prefixed backend name for keys built by NodeScopedKey", func() { + appCfg := &config.ApplicationConfig{} + galleryService := galleryop.NewGalleryService(appCfg, nil) + opcache := galleryop.NewOpCache(galleryService) + + key := galleryop.NodeScopedKey("worker-7", "llama-cpp") + opcache.SetBackend(key, "job-uuid-123") + + e := echo.New() + routes.RegisterUIAPIRoutes(e, nil, nil, appCfg, galleryService, opcache, &application.Application{}, noopMw) + + req := httptest.NewRequest(http.MethodGet, "/api/operations", nil) + rec := httptest.NewRecorder() + e.ServeHTTP(rec, req) + + Expect(rec.Code).To(Equal(http.StatusOK)) + + // The handler wraps operations in {"operations": [...]}. + var envelope struct { + Operations []map[string]any `json:"operations"` + } + Expect(json.Unmarshal(rec.Body.Bytes(), &envelope)).To(Succeed()) + + var found map[string]any + for _, op := range envelope.Operations { + if op["jobID"] == "job-uuid-123" { + found = op + break + } + } + Expect(found).ToNot(BeNil(), "node-scoped op should appear in /api/operations") + Expect(found["nodeID"]).To(Equal("worker-7")) + Expect(found["name"]).To(Equal("llama-cpp")) + Expect(found["isBackend"]).To(Equal(true)) + }) + + It("does not emit nodeID for non-node-scoped backend ops", func() { + appCfg := &config.ApplicationConfig{} + galleryService := galleryop.NewGalleryService(appCfg, nil) + opcache := galleryop.NewOpCache(galleryService) + + // Legacy/global install path: bare backend name as the opcache key. + opcache.SetBackend("llama-cpp", "job-uuid-456") + + e := echo.New() + routes.RegisterUIAPIRoutes(e, nil, nil, appCfg, galleryService, opcache, &application.Application{}, noopMw) + + req := httptest.NewRequest(http.MethodGet, "/api/operations", nil) + rec := httptest.NewRecorder() + e.ServeHTTP(rec, req) + + Expect(rec.Code).To(Equal(http.StatusOK)) + var envelope struct { + Operations []map[string]any `json:"operations"` + } + Expect(json.Unmarshal(rec.Body.Bytes(), &envelope)).To(Succeed()) + + var found map[string]any + for _, op := range envelope.Operations { + if op["jobID"] == "job-uuid-456" { + found = op + break + } + } + Expect(found).ToNot(BeNil()) + // Critical: bare ops must NOT gain a misleading empty nodeID field. + Expect(found).ToNot(HaveKey("nodeID"), "non-node-scoped ops must NOT carry a nodeID field") + Expect(found["name"]).To(Equal("llama-cpp")) + }) +}) From 458e56aaeb8eab6438122e7f85eb2799a6c42dd3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 May 2026 19:43:40 +0000 Subject: [PATCH 08/11] feat(react-ui): poll job status for node-targeted backend installs Assisted-by: Claude:opus-4-7 [Edit] [Bash] Signed-off-by: Ettore Di Giacinto --- .../src/components/NodeInstallPicker.jsx | 131 +++++++++++++++--- 1 file changed, 108 insertions(+), 23 deletions(-) diff --git a/core/http/react-ui/src/components/NodeInstallPicker.jsx b/core/http/react-ui/src/components/NodeInstallPicker.jsx index 50fccc2b2b95..b1b2920b9825 100644 --- a/core/http/react-ui/src/components/NodeInstallPicker.jsx +++ b/core/http/react-ui/src/components/NodeInstallPicker.jsx @@ -1,7 +1,7 @@ import { useState, useMemo, useEffect, useRef } from 'react' import Modal from './Modal' import SearchableSelect from './SearchableSelect' -import { nodesApi } from '../utils/api' +import { nodesApi, backendsApi } from '../utils/api' // NodeInstallPicker is the single multi-node install surface used both from // the Backends gallery split-button and from the "Install on more nodes" `+` @@ -240,6 +240,37 @@ export default function NodeInstallPicker({ } const clearSelection = () => setSelected(new Set()) + // pollJob resolves with { done: true, error?: string } once a single job + // completes, fails, or is cancelled. Bounded by a hard wall-clock cap so a + // stuck worker eventually surfaces in the UI as "Failed" instead of + // spinning forever. + const pollJob = (jobID) => new Promise((resolve) => { + const POLL_INTERVAL_MS = 1500 + const HARD_CAP_MS = 6 * 60 * 1000 // 6 min - generous for a fresh worker download + const startedAt = Date.now() + + const tick = async () => { + try { + const status = await backendsApi.getJob(jobID) + if (status?.completed) { resolve({ done: true }); return } + if (status?.error) { resolve({ done: true, error: status.error }); return } + if (status?.processed && !status?.completed) { + resolve({ done: true, error: status.error || (status.cancelled ? 'cancelled' : '') }) + return + } + } catch (err) { + resolve({ done: true, error: err?.message || 'polling failed' }) + return + } + if (Date.now() - startedAt > HARD_CAP_MS) { + resolve({ done: true, error: 'timed out waiting for install to finish' }) + return + } + setTimeout(tick, POLL_INTERVAL_MS) + } + tick() + }) + const submit = async () => { if (selected.size === 0 || submitting) return if (counts.overrides > 0 && !showMismatchConfirm) { @@ -255,38 +286,66 @@ export default function NodeInstallPicker({ return next }) - const results = await Promise.allSettled(ids.map(id => + // Phase 1: dispatch all installs in parallel. Each POST returns immediately + // with { jobID } now that the handler is async. + const dispatchResults = await Promise.allSettled(ids.map(id => nodesApi.installBackend(id, effectiveBackendName) - .then(r => ({ id, ok: true, message: r?.message })) - .catch(err => ({ id, ok: false, error: err?.message || 'install failed' })) + .then(r => ({ id, ok: true, jobID: r?.jobID })) + .catch(err => ({ id, ok: false, error: err?.message || 'dispatch failed' })) )) - let successCount = 0, failCount = 0 + const jobs = [] setPerNode(prev => { const next = { ...prev } - for (const r of results) { + for (const r of dispatchResults) { if (r.status !== 'fulfilled') continue const v = r.value - if (v.ok) { - next[v.id] = { status: 'done' } - successCount++ + if (v.ok && v.jobID) { + next[v.id] = { status: 'installing', jobID: v.jobID } + jobs.push({ nodeID: v.id, jobID: v.jobID }) } else { - next[v.id] = { status: 'error', error: v.error } - failCount++ + next[v.id] = { status: 'error', error: v.error || 'dispatch failed' } } } return next }) + + // Phase 2: poll each job. Promise.all resolves when the last job settles; + // intermediate updates flip per-row state via the setPerNode inside pollJob. + await Promise.all(jobs.map(async ({ nodeID, jobID }) => { + const result = await pollJob(jobID) + setPerNode(prev => { + const next = { ...prev } + if (result.error) { + next[nodeID] = { status: 'error', error: result.error, jobID } + } else { + next[nodeID] = { status: 'done', jobID } + } + return next + }) + })) + + // Phase 3: summary toast + onComplete. Read latest state via functional setter. + let successCount = 0 + let failCount = 0 + setPerNode(prev => { + for (const v of Object.values(prev)) { + if (v.status === 'done') successCount++ + else if (v.status === 'error') failCount++ + } + return prev + }) + setSubmitting(false) if (successCount > 0 && onComplete) onComplete() - if (failCount === 0) { + if (failCount === 0 && successCount > 0) { addToast?.(`Installed on ${successCount} node${successCount === 1 ? '' : 's'}`, 'success') setTimeout(() => onClose?.(), 800) - } else if (successCount === 0) { + } else if (successCount === 0 && failCount > 0) { addToast?.(`Install failed on all ${failCount} node${failCount === 1 ? '' : 's'}`, 'error') - } else { + } else if (successCount > 0 && failCount > 0) { addToast?.(`Installed on ${successCount}, failed on ${failCount}`, 'warning') } } @@ -297,32 +356,58 @@ export default function NodeInstallPicker({ .map(([id]) => id) if (failedIds.length === 0) return setSelected(new Set(failedIds)) - // Replace state for failed rows so they show "installing" again, not stale errors. setPerNode(prev => { const next = { ...prev } failedIds.forEach(id => { next[id] = { status: 'installing' } }) return next }) setSubmitting(true) - const results = await Promise.allSettled(failedIds.map(id => + + const dispatchResults = await Promise.allSettled(failedIds.map(id => nodesApi.installBackend(id, effectiveBackendName) - .then(r => ({ id, ok: true, message: r?.message })) - .catch(err => ({ id, ok: false, error: err?.message || 'install failed' })) + .then(r => ({ id, ok: true, jobID: r?.jobID })) + .catch(err => ({ id, ok: false, error: err?.message || 'dispatch failed' })) )) - let successCount = 0, failCount = 0 + + const jobs = [] setPerNode(prev => { const next = { ...prev } - for (const r of results) { + for (const r of dispatchResults) { if (r.status !== 'fulfilled') continue const v = r.value - if (v.ok) { next[v.id] = { status: 'done' }; successCount++ } - else { next[v.id] = { status: 'error', error: v.error }; failCount++ } + if (v.ok && v.jobID) { + next[v.id] = { status: 'installing', jobID: v.jobID } + jobs.push({ nodeID: v.id, jobID: v.jobID }) + } else { + next[v.id] = { status: 'error', error: v.error || 'dispatch failed' } + } } return next }) + + await Promise.all(jobs.map(async ({ nodeID, jobID }) => { + const result = await pollJob(jobID) + setPerNode(prev => { + const next = { ...prev } + if (result.error) next[nodeID] = { status: 'error', error: result.error, jobID } + else next[nodeID] = { status: 'done', jobID } + return next + }) + })) + setSubmitting(false) + + let successCount = 0, failCount = 0 + setPerNode(prev => { + for (const id of failedIds) { + const v = prev[id] + if (v?.status === 'done') successCount++ + else if (v?.status === 'error') failCount++ + } + return prev + }) if (successCount > 0 && onComplete) onComplete() - if (failCount === 0) { + if (failCount === 0 && successCount > 0) { addToast?.(`Installed on ${successCount} node${successCount === 1 ? '' : 's'}`, 'success') setTimeout(() => onClose?.(), 800) } From ccd4e95d79c14ebf7d5ce9aa0d785a1375e069e8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 May 2026 19:47:54 +0000 Subject: [PATCH 09/11] fix(react-ui): make NodeInstallPicker state updates pure and surface cancellations as errors Assisted-by: Claude:opus-4-7 [Edit] [Bash] Signed-off-by: Ettore Di Giacinto --- .../src/components/NodeInstallPicker.jsx | 56 ++++++++++--------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/core/http/react-ui/src/components/NodeInstallPicker.jsx b/core/http/react-ui/src/components/NodeInstallPicker.jsx index b1b2920b9825..517d7156ce85 100644 --- a/core/http/react-ui/src/components/NodeInstallPicker.jsx +++ b/core/http/react-ui/src/components/NodeInstallPicker.jsx @@ -255,7 +255,7 @@ export default function NodeInstallPicker({ if (status?.completed) { resolve({ done: true }); return } if (status?.error) { resolve({ done: true, error: status.error }); return } if (status?.processed && !status?.completed) { - resolve({ done: true, error: status.error || (status.cancelled ? 'cancelled' : '') }) + resolve({ done: true, error: status.error || 'install did not complete' }) return } } catch (err) { @@ -294,21 +294,23 @@ export default function NodeInstallPicker({ .catch(err => ({ id, ok: false, error: err?.message || 'dispatch failed' })) )) + // Classify dispatch results synchronously OUTSIDE the setter. React may + // invoke a functional state updater more than once (StrictMode dev double + // invoke, concurrent rendering replay): building the jobs array inside + // the closure would duplicate entries and re-poll the same job. const jobs = [] - setPerNode(prev => { - const next = { ...prev } - for (const r of dispatchResults) { - if (r.status !== 'fulfilled') continue - const v = r.value - if (v.ok && v.jobID) { - next[v.id] = { status: 'installing', jobID: v.jobID } - jobs.push({ nodeID: v.id, jobID: v.jobID }) - } else { - next[v.id] = { status: 'error', error: v.error || 'dispatch failed' } - } + const dispatchPatch = {} + for (const r of dispatchResults) { + if (r.status !== 'fulfilled') continue + const v = r.value + if (v.ok && v.jobID) { + dispatchPatch[v.id] = { status: 'installing', jobID: v.jobID } + jobs.push({ nodeID: v.id, jobID: v.jobID }) + } else { + dispatchPatch[v.id] = { status: 'error', error: v.error || 'dispatch failed' } } - return next - }) + } + setPerNode(prev => ({ ...prev, ...dispatchPatch })) // Phase 2: poll each job. Promise.all resolves when the last job settles; // intermediate updates flip per-row state via the setPerNode inside pollJob. @@ -369,21 +371,21 @@ export default function NodeInstallPicker({ .catch(err => ({ id, ok: false, error: err?.message || 'dispatch failed' })) )) + // Same precaution as in submit(): classify outside the functional setter + // so a replayed updater can't push duplicate jobs into the polling list. const jobs = [] - setPerNode(prev => { - const next = { ...prev } - for (const r of dispatchResults) { - if (r.status !== 'fulfilled') continue - const v = r.value - if (v.ok && v.jobID) { - next[v.id] = { status: 'installing', jobID: v.jobID } - jobs.push({ nodeID: v.id, jobID: v.jobID }) - } else { - next[v.id] = { status: 'error', error: v.error || 'dispatch failed' } - } + const dispatchPatch = {} + for (const r of dispatchResults) { + if (r.status !== 'fulfilled') continue + const v = r.value + if (v.ok && v.jobID) { + dispatchPatch[v.id] = { status: 'installing', jobID: v.jobID } + jobs.push({ nodeID: v.id, jobID: v.jobID }) + } else { + dispatchPatch[v.id] = { status: 'error', error: v.error || 'dispatch failed' } } - return next - }) + } + setPerNode(prev => ({ ...prev, ...dispatchPatch })) await Promise.all(jobs.map(async ({ nodeID, jobID }) => { const result = await pollJob(jobID) From a482132d8855e4f661c9bb223c730bd3d182ea9f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 May 2026 19:49:20 +0000 Subject: [PATCH 10/11] refactor(react-ui): clarify async semantics in handleInstallOnTarget Assisted-by: Claude:opus-4-7 [Edit] [Bash] Signed-off-by: Ettore Di Giacinto --- core/http/react-ui/src/pages/Backends.jsx | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/core/http/react-ui/src/pages/Backends.jsx b/core/http/react-ui/src/pages/Backends.jsx index 3c076b068781..53f1ef547c44 100644 --- a/core/http/react-ui/src/pages/Backends.jsx +++ b/core/http/react-ui/src/pages/Backends.jsx @@ -179,16 +179,19 @@ export default function Backends() { // Install a single gallery backend on a specific node, used in target-node // mode (the URL has ?target= set from the Nodes page entry point). + // The handler is async - we dispatch and let the global Operations panel + // surface progress; no need to await completion here. const handleInstallOnTarget = async (id) => { if (!targetNode) return try { await nodesApi.installBackend(targetNode.id, id) - addToast(`Installing ${id} on ${targetNode.name}…`, 'info') - // Per-node install is request-reply, not part of the global jobs feed — - // refetch to reflect the new Nodes column state. - setTimeout(() => { fetchBackends(); refetchNodes() }, 600) + addToast(`Installing ${id} on ${targetNode.name}...`, 'info') + // The install runs async via the gallery job queue. Refetch shortly so + // the Nodes column reflects "installing" state; the Operations panel + // tracks the actual progress until completion. + setTimeout(() => { fetchBackends(); refetchNodes() }, 1200) } catch (err) { - addToast(`Install failed on ${targetNode.name}: ${err.message}`, 'error') + addToast(`Install dispatch failed on ${targetNode.name}: ${err.message}`, 'error') } } From cbb20ae957fe33d441ffaf5179d5e6f8eb5124fc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 May 2026 19:55:00 +0000 Subject: [PATCH 11/11] refactor(http): use statusUrl casing for node install response to match codebase precedent Assisted-by: Claude:opus-4-7 [Edit] [Bash] Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/localai/nodes.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/http/endpoints/localai/nodes.go b/core/http/endpoints/localai/nodes.go index fb69dcd8de8f..c27f0ed1d07d 100644 --- a/core/http/endpoints/localai/nodes.go +++ b/core/http/endpoints/localai/nodes.go @@ -470,7 +470,7 @@ func InstallBackendOnNodeEndpoint(_ nodes.NodeCommandSender, galleryService *gal xlog.Info("Node-scoped backend install dispatched", "node", nodeID, "backend", req.Backend, "uri", req.URI, "jobID", jobID) return c.JSON(http.StatusAccepted, map[string]string{ "jobID": jobID, - "statusURL": "/api/backends/job/" + jobID, + "statusUrl": "/api/backends/job/" + jobID, "message": "backend installation started", }) }