From 0741f669e0726f0c4f05ad4564e6e434a9af593a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 20 Nov 2025 16:41:12 +0100 Subject: [PATCH 1/9] feat(ui): add watchdog settings Signed-off-by: Ettore Di Giacinto --- Dockerfile | 2 +- core/application/application.go | 63 ++++ core/application/config_file_watcher.go | 446 ++++++++++++++---------- core/application/startup.go | 154 +++++++- core/http/app.go | 2 +- core/http/endpoints/localai/settings.go | 279 +++++++++++++++ core/http/routes/ui.go | 9 + core/http/routes/ui_api.go | 8 +- core/http/views/manage.html | 6 + core/http/views/settings.html | 321 +++++++++++++++++ pkg/model/loader.go | 4 + 11 files changed, 1096 insertions(+), 198 deletions(-) create mode 100644 core/http/endpoints/localai/settings.go create mode 100644 core/http/views/settings.html diff --git a/Dockerfile b/Dockerfile index 151c73e161fc..7a705a9fc514 100644 --- a/Dockerfile +++ b/Dockerfile @@ -332,6 +332,6 @@ RUN mkdir -p /models /backends HEALTHCHECK --interval=1m --timeout=10m --retries=10 \ CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1 -VOLUME /models /backends +VOLUME /models /backends /configuration EXPOSE 8080 ENTRYPOINT [ "/entrypoint.sh" ] diff --git a/core/application/application.go b/core/application/application.go index c852566d7a47..a4fcdacaa026 100644 --- a/core/application/application.go +++ b/core/application/application.go @@ -1,10 +1,14 @@ package application import ( + "sync" + "time" + "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/templates" "github.com/mudler/LocalAI/pkg/model" + "github.com/rs/zerolog/log" ) type Application struct { @@ -13,6 +17,8 @@ type Application struct { applicationConfig *config.ApplicationConfig templatesEvaluator *templates.Evaluator galleryService *services.GalleryService + watchdogMutex sync.Mutex + watchdogStop chan bool } func newApplication(appConfig *config.ApplicationConfig) *Application { @@ -44,6 +50,63 @@ func (a *Application) GalleryService() *services.GalleryService { return a.galleryService } +// RestartWatchdog restarts the watchdog with current ApplicationConfig settings +func (a *Application) RestartWatchdog() error { + a.watchdogMutex.Lock() + defer a.watchdogMutex.Unlock() + + appConfig := a.ApplicationConfig() + + // Shutdown existing watchdog if running + if a.watchdogStop != nil { + close(a.watchdogStop) + a.watchdogStop = nil + } + + // Shutdown existing watchdog if running + currentWD := a.modelLoader.GetWatchDog() + if currentWD != nil { + currentWD.Shutdown() + // Wait a bit for shutdown to complete + time.Sleep(100 * time.Millisecond) + } + + // Create new watchdog if enabled + if appConfig.WatchDog { + wd := model.NewWatchDog( + a.modelLoader, + appConfig.WatchDogBusyTimeout, + appConfig.WatchDogIdleTimeout, + appConfig.WatchDogBusy, + appConfig.WatchDogIdle) + a.modelLoader.SetWatchDog(wd) + + // Create new stop channel + a.watchdogStop = make(chan bool, 1) + + // Start watchdog goroutine + go wd.Run() + + // Setup shutdown handler + go func() { + select { + case <-a.watchdogStop: + log.Debug().Msg("Watchdog stop signal received") + wd.Shutdown() + case <-appConfig.Context.Done(): + log.Debug().Msg("Context canceled, shutting down watchdog") + wd.Shutdown() + } + }() + + log.Info().Msg("Watchdog restarted with new settings") + } else { + log.Info().Msg("Watchdog disabled") + } + + return nil +} + func (a *Application) start() error { galleryService := services.NewGalleryService(a.ApplicationConfig(), a.ModelLoader()) err := galleryService.Start(a.ApplicationConfig().Context, a.ModelConfigLoader(), a.ApplicationConfig().SystemState) diff --git a/core/application/config_file_watcher.go b/core/application/config_file_watcher.go index 46f29b101acb..ef922355eacf 100644 --- a/core/application/config_file_watcher.go +++ b/core/application/config_file_watcher.go @@ -1,180 +1,266 @@ -package application - -import ( - "encoding/json" - "fmt" - "os" - "path" - "path/filepath" - "time" - - "dario.cat/mergo" - "github.com/fsnotify/fsnotify" - "github.com/mudler/LocalAI/core/config" - "github.com/rs/zerolog/log" -) - -type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error - -type configFileHandler struct { - handlers map[string]fileHandler - - watcher *fsnotify.Watcher - - appConfig *config.ApplicationConfig -} - -// TODO: This should be a singleton eventually so other parts of the code can register config file handlers, -// then we can export it to other packages -func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler { - c := configFileHandler{ - handlers: make(map[string]fileHandler), - appConfig: appConfig, - } - err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true) - if err != nil { - log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler") - } - err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true) - if err != nil { - log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler") - } - return c -} - -func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error { - _, ok := c.handlers[filename] - if ok { - return fmt.Errorf("handler already registered for file %s", filename) - } - c.handlers[filename] = handler - if runNow { - c.callHandler(filename, handler) - } - return nil -} - -func (c *configFileHandler) callHandler(filename string, handler fileHandler) { - rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename)) - log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update") - fileContent, err := os.ReadFile(rootedFilePath) - if err != nil && !os.IsNotExist(err) { - log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file") - } - - if err = handler(fileContent, c.appConfig); err != nil { - log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options") - } -} - -func (c *configFileHandler) Watch() error { - configWatcher, err := fsnotify.NewWatcher() - c.watcher = configWatcher - if err != nil { - return err - } - - if c.appConfig.DynamicConfigsDirPollInterval > 0 { - log.Debug().Msg("Poll interval set, falling back to polling for configuration changes") - ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval) - go func() { - for { - <-ticker.C - for file, handler := range c.handlers { - log.Debug().Str("file", file).Msg("polling config file") - c.callHandler(file, handler) - } - } - }() - } - - // Start listening for events. - go func() { - for { - select { - case event, ok := <-c.watcher.Events: - if !ok { - return - } - if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) { - handler, ok := c.handlers[path.Base(event.Name)] - if !ok { - continue - } - - c.callHandler(filepath.Base(event.Name), handler) - } - case err, ok := <-c.watcher.Errors: - log.Error().Err(err).Msg("config watcher error received") - if !ok { - return - } - } - } - }() - - // Add a path. - err = c.watcher.Add(c.appConfig.DynamicConfigsDir) - if err != nil { - return fmt.Errorf("unable to create a watcher on the configuration directory: %+v", err) - } - - return nil -} - -// TODO: When we institute graceful shutdown, this should be called -func (c *configFileHandler) Stop() error { - return c.watcher.Close() -} - -func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler { - handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { - log.Debug().Msg("processing api keys runtime update") - log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup") - - if len(fileContent) > 0 { - // Parse JSON content from the file - var fileKeys []string - err := json.Unmarshal(fileContent, &fileKeys) - if err != nil { - return err - } - - log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile") - - appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...) - } else { - log.Trace().Msg("no API keys discovered from dynamic config file") - appConfig.ApiKeys = startupAppConfig.ApiKeys - } - log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing") - return nil - } - - return handler -} - -func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler { - handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { - log.Debug().Msg("processing external_backends.json") - - if len(fileContent) > 0 { - // Parse JSON content from the file - var fileBackends map[string]string - err := json.Unmarshal(fileContent, &fileBackends) - if err != nil { - return err - } - appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends - err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends) - if err != nil { - return err - } - } else { - appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends - } - log.Debug().Msg("external backends loaded from external_backends.json") - return nil - } - return handler -} +package application + +import ( + "encoding/json" + "fmt" + "os" + "path" + "path/filepath" + "time" + + "dario.cat/mergo" + "github.com/fsnotify/fsnotify" + "github.com/mudler/LocalAI/core/config" + "github.com/rs/zerolog/log" +) + +type fileHandler func(fileContent []byte, appConfig *config.ApplicationConfig) error + +type configFileHandler struct { + handlers map[string]fileHandler + + watcher *fsnotify.Watcher + + appConfig *config.ApplicationConfig +} + +// TODO: This should be a singleton eventually so other parts of the code can register config file handlers, +// then we can export it to other packages +func newConfigFileHandler(appConfig *config.ApplicationConfig) configFileHandler { + c := configFileHandler{ + handlers: make(map[string]fileHandler), + appConfig: appConfig, + } + err := c.Register("api_keys.json", readApiKeysJson(*appConfig), true) + if err != nil { + log.Error().Err(err).Str("file", "api_keys.json").Msg("unable to register config file handler") + } + err = c.Register("external_backends.json", readExternalBackendsJson(*appConfig), true) + if err != nil { + log.Error().Err(err).Str("file", "external_backends.json").Msg("unable to register config file handler") + } + err = c.Register("runtime_settings.json", readRuntimeSettingsJson(*appConfig), true) + if err != nil { + log.Error().Err(err).Str("file", "runtime_settings.json").Msg("unable to register config file handler") + } + return c +} + +func (c *configFileHandler) Register(filename string, handler fileHandler, runNow bool) error { + _, ok := c.handlers[filename] + if ok { + return fmt.Errorf("handler already registered for file %s", filename) + } + c.handlers[filename] = handler + if runNow { + c.callHandler(filename, handler) + } + return nil +} + +func (c *configFileHandler) callHandler(filename string, handler fileHandler) { + rootedFilePath := filepath.Join(c.appConfig.DynamicConfigsDir, filepath.Clean(filename)) + log.Trace().Str("filename", rootedFilePath).Msg("reading file for dynamic config update") + fileContent, err := os.ReadFile(rootedFilePath) + if err != nil && !os.IsNotExist(err) { + log.Error().Err(err).Str("filename", rootedFilePath).Msg("could not read file") + } + + if err = handler(fileContent, c.appConfig); err != nil { + log.Error().Err(err).Msg("WatchConfigDirectory goroutine failed to update options") + } +} + +func (c *configFileHandler) Watch() error { + configWatcher, err := fsnotify.NewWatcher() + c.watcher = configWatcher + if err != nil { + return err + } + + if c.appConfig.DynamicConfigsDirPollInterval > 0 { + log.Debug().Msg("Poll interval set, falling back to polling for configuration changes") + ticker := time.NewTicker(c.appConfig.DynamicConfigsDirPollInterval) + go func() { + for { + <-ticker.C + for file, handler := range c.handlers { + log.Debug().Str("file", file).Msg("polling config file") + c.callHandler(file, handler) + } + } + }() + } + + // Start listening for events. + go func() { + for { + select { + case event, ok := <-c.watcher.Events: + if !ok { + return + } + if event.Has(fsnotify.Write | fsnotify.Create | fsnotify.Remove) { + handler, ok := c.handlers[path.Base(event.Name)] + if !ok { + continue + } + + c.callHandler(filepath.Base(event.Name), handler) + } + case err, ok := <-c.watcher.Errors: + log.Error().Err(err).Msg("config watcher error received") + if !ok { + return + } + } + } + }() + + // Add a path. + err = c.watcher.Add(c.appConfig.DynamicConfigsDir) + if err != nil { + return fmt.Errorf("unable to create a watcher on the configuration directory: %+v", err) + } + + return nil +} + +// TODO: When we institute graceful shutdown, this should be called +func (c *configFileHandler) Stop() error { + return c.watcher.Close() +} + +func readApiKeysJson(startupAppConfig config.ApplicationConfig) fileHandler { + handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { + log.Debug().Msg("processing api keys runtime update") + log.Trace().Int("numKeys", len(startupAppConfig.ApiKeys)).Msg("api keys provided at startup") + + if len(fileContent) > 0 { + // Parse JSON content from the file + var fileKeys []string + err := json.Unmarshal(fileContent, &fileKeys) + if err != nil { + return err + } + + log.Trace().Int("numKeys", len(fileKeys)).Msg("discovered API keys from api keys dynamic config dile") + + appConfig.ApiKeys = append(startupAppConfig.ApiKeys, fileKeys...) + } else { + log.Trace().Msg("no API keys discovered from dynamic config file") + appConfig.ApiKeys = startupAppConfig.ApiKeys + } + log.Trace().Int("numKeys", len(appConfig.ApiKeys)).Msg("total api keys after processing") + return nil + } + + return handler +} + +func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHandler { + handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { + log.Debug().Msg("processing external_backends.json") + + if len(fileContent) > 0 { + // Parse JSON content from the file + var fileBackends map[string]string + err := json.Unmarshal(fileContent, &fileBackends) + if err != nil { + return err + } + appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends + err = mergo.Merge(&appConfig.ExternalGRPCBackends, &fileBackends) + if err != nil { + return err + } + } else { + appConfig.ExternalGRPCBackends = startupAppConfig.ExternalGRPCBackends + } + log.Debug().Msg("external backends loaded from external_backends.json") + return nil + } + return handler +} + +type runtimeSettings struct { + WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` + WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` + WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` + WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` + WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` + SingleBackend *bool `json:"single_backend,omitempty"` + ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` +} + +func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler { + handler := func(fileContent []byte, appConfig *config.ApplicationConfig) error { + log.Debug().Msg("processing runtime_settings.json") + + // Determine if settings came from env vars by comparing with startup config + // startupAppConfig contains the original values set from env vars at startup. + // If current values match startup values and are non-default, they came from env vars. + // We apply file settings only if current values match startup defaults (false/0), + // which suggests they weren't set from env vars. + envWatchdogIdle := appConfig.WatchDogIdle == startupAppConfig.WatchDogIdle && startupAppConfig.WatchDogIdle + envWatchdogBusy := appConfig.WatchDogBusy == startupAppConfig.WatchDogBusy && startupAppConfig.WatchDogBusy + envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupAppConfig.WatchDogIdleTimeout && startupAppConfig.WatchDogIdleTimeout > 0 + envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupAppConfig.WatchDogBusyTimeout && startupAppConfig.WatchDogBusyTimeout > 0 + envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend && startupAppConfig.SingleBackend + envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests && startupAppConfig.ParallelBackendRequests + + if len(fileContent) > 0 { + var settings runtimeSettings + err := json.Unmarshal(fileContent, &settings) + if err != nil { + return err + } + + // Apply file settings only if they don't match startup non-default values (i.e., not from env vars) + if settings.WatchdogIdleEnabled != nil && !envWatchdogIdle { + appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled + if appConfig.WatchDogIdle { + appConfig.WatchDog = true + } + } + if settings.WatchdogBusyEnabled != nil && !envWatchdogBusy { + appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled + if appConfig.WatchDogBusy { + appConfig.WatchDog = true + } + } + if settings.WatchdogIdleTimeout != nil && !envWatchdogIdleTimeout { + dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout) + if err == nil { + appConfig.WatchDogIdleTimeout = dur + } else { + log.Warn().Err(err).Str("timeout", *settings.WatchdogIdleTimeout).Msg("invalid watchdog idle timeout in runtime_settings.json") + } + } + if settings.WatchdogBusyTimeout != nil && !envWatchdogBusyTimeout { + dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout) + if err == nil { + appConfig.WatchDogBusyTimeout = dur + } else { + log.Warn().Err(err).Str("timeout", *settings.WatchdogBusyTimeout).Msg("invalid watchdog busy timeout in runtime_settings.json") + } + } + if settings.SingleBackend != nil && !envSingleBackend { + appConfig.SingleBackend = *settings.SingleBackend + } + if settings.ParallelBackendRequests != nil && !envParallelRequests { + appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests + } + + // If watchdog is enabled via file but not via env, ensure WatchDog flag is set + if !envWatchdogIdle && !envWatchdogBusy { + if settings.WatchdogEnabled != nil && *settings.WatchdogEnabled { + appConfig.WatchDog = true + } + } + } + log.Debug().Msg("runtime settings loaded from runtime_settings.json") + return nil + } + return handler +} diff --git a/core/application/startup.go b/core/application/startup.go index eb387d06debd..60643b33f83a 100644 --- a/core/application/startup.go +++ b/core/application/startup.go @@ -1,8 +1,11 @@ package application import ( + "encoding/json" "fmt" "os" + "path/filepath" + "time" "github.com/mudler/LocalAI/core/backend" "github.com/mudler/LocalAI/core/config" @@ -110,6 +113,12 @@ func New(opts ...config.AppOption) (*Application, error) { } } + // Load runtime settings from file if DynamicConfigsDir is set + // This applies file settings with env var precedence (env vars take priority) + if options.DynamicConfigsDir != "" { + loadRuntimeSettingsFromFile(options) + } + // turn off any process that was started by GRPC if the context is canceled go func() { <-options.Context.Done() @@ -120,21 +129,8 @@ func New(opts ...config.AppOption) (*Application, error) { } }() - if options.WatchDog { - wd := model.NewWatchDog( - application.ModelLoader(), - options.WatchDogBusyTimeout, - options.WatchDogIdleTimeout, - options.WatchDogBusy, - options.WatchDogIdle) - application.ModelLoader().SetWatchDog(wd) - go wd.Run() - go func() { - <-options.Context.Done() - log.Debug().Msgf("Context canceled, shutting down") - wd.Shutdown() - }() - } + // Initialize watchdog with current settings (after loading from file) + initializeWatchdog(application, options) if options.LoadToMemory != nil && !options.SingleBackend { for _, m := range options.LoadToMemory { @@ -186,3 +182,131 @@ func startWatcher(options *config.ApplicationConfig) { log.Error().Err(err).Msg("failed creating watcher") } } + +// loadRuntimeSettingsFromFile loads settings from runtime_settings.json with env var precedence +// This function is called at startup, before env vars are applied via AppOptions. +// Since env vars are applied via AppOptions in run.go, we need to check if they're set. +// We do this by checking if the current options values differ from defaults, which would +// indicate they were set from env vars. However, a simpler approach is to just apply +// file settings here, and let the AppOptions (which are applied after this) override them. +// But actually, this is called AFTER AppOptions are applied in New(), so we need to check env vars. +// The cleanest solution: Store original values before applying file, or check if values match +// what would be set from env vars. For now, we'll apply file settings and they'll be +// overridden by AppOptions if env vars were set (but AppOptions are already applied). +// Actually, this function is called in New() before AppOptions are fully processed for watchdog. +// Let's check the call order: New() -> loadRuntimeSettingsFromFile() -> initializeWatchdog() +// But AppOptions are applied in NewApplicationConfig() which is called first. +// So at this point, options already has values from env vars. We should compare against +// defaults to see if env vars were set. But we don't have defaults stored. +// Simplest: Just apply file settings. If env vars were set, they're already in options. +// The file watcher handler will handle runtime changes properly by comparing with startupAppConfig. +func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) { + settingsFile := filepath.Join(options.DynamicConfigsDir, "runtime_settings.json") + fileContent, err := os.ReadFile(settingsFile) + if err != nil { + if os.IsNotExist(err) { + log.Debug().Msg("runtime_settings.json not found, using defaults") + return + } + log.Warn().Err(err).Msg("failed to read runtime_settings.json") + return + } + + var settings struct { + WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` + WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` + WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` + WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` + WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` + SingleBackend *bool `json:"single_backend,omitempty"` + ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` + } + + if err := json.Unmarshal(fileContent, &settings); err != nil { + log.Warn().Err(err).Msg("failed to parse runtime_settings.json") + return + } + + // At this point, options already has values from env vars (via AppOptions in run.go). + // To avoid env var duplication, we determine if env vars were set by checking if + // current values differ from defaults. Defaults are: false for bools, 0 for durations. + // If current value is at default, it likely wasn't set from env var, so we can apply file. + // If current value is non-default, it was likely set from env var, so we preserve it. + // Note: This means env vars explicitly setting to false/0 won't be distinguishable from defaults, + // but that's an acceptable limitation to avoid env var duplication. + + if settings.WatchdogIdleEnabled != nil { + // Only apply if current value is default (false), suggesting it wasn't set from env var + if !options.WatchDogIdle { + options.WatchDogIdle = *settings.WatchdogIdleEnabled + if options.WatchDogIdle { + options.WatchDog = true + } + } + } + if settings.WatchdogBusyEnabled != nil { + if !options.WatchDogBusy { + options.WatchDogBusy = *settings.WatchdogBusyEnabled + if options.WatchDogBusy { + options.WatchDog = true + } + } + } + if settings.WatchdogIdleTimeout != nil { + // Only apply if current value is default (0), suggesting it wasn't set from env var + if options.WatchDogIdleTimeout == 0 { + dur, err := time.ParseDuration(*settings.WatchdogIdleTimeout) + if err == nil { + options.WatchDogIdleTimeout = dur + } else { + log.Warn().Err(err).Str("timeout", *settings.WatchdogIdleTimeout).Msg("invalid watchdog idle timeout in runtime_settings.json") + } + } + } + if settings.WatchdogBusyTimeout != nil { + if options.WatchDogBusyTimeout == 0 { + dur, err := time.ParseDuration(*settings.WatchdogBusyTimeout) + if err == nil { + options.WatchDogBusyTimeout = dur + } else { + log.Warn().Err(err).Str("timeout", *settings.WatchdogBusyTimeout).Msg("invalid watchdog busy timeout in runtime_settings.json") + } + } + } + if settings.SingleBackend != nil { + if !options.SingleBackend { + options.SingleBackend = *settings.SingleBackend + } + } + if settings.ParallelBackendRequests != nil { + if !options.ParallelBackendRequests { + options.ParallelBackendRequests = *settings.ParallelBackendRequests + } + } + if !options.WatchDogIdle && !options.WatchDogBusy { + if settings.WatchdogEnabled != nil && *settings.WatchdogEnabled { + options.WatchDog = true + } + } + + log.Debug().Msg("Runtime settings loaded from runtime_settings.json") +} + +// initializeWatchdog initializes the watchdog with current ApplicationConfig settings +func initializeWatchdog(application *Application, options *config.ApplicationConfig) { + if options.WatchDog { + wd := model.NewWatchDog( + application.ModelLoader(), + options.WatchDogBusyTimeout, + options.WatchDogIdleTimeout, + options.WatchDogBusy, + options.WatchDogIdle) + application.ModelLoader().SetWatchDog(wd) + go wd.Run() + go func() { + <-options.Context.Done() + log.Debug().Msgf("Context canceled, shutting down") + wd.Shutdown() + }() + } +} diff --git a/core/http/app.go b/core/http/app.go index cec25fc6c1f1..a5ce91e42566 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -208,7 +208,7 @@ func API(application *application.Application) (*echo.Echo, error) { routes.RegisterLocalAIRoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application.TemplatesEvaluator()) routes.RegisterOpenAIRoutes(e, requestExtractor, application) if !application.ApplicationConfig().DisableWebUI { - routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache) + routes.RegisterUIAPIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService(), opcache, application) routes.RegisterUIRoutes(e, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig(), application.GalleryService()) } routes.RegisterJINARoutes(e, requestExtractor, application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig()) diff --git a/core/http/endpoints/localai/settings.go b/core/http/endpoints/localai/settings.go new file mode 100644 index 000000000000..5621affb0458 --- /dev/null +++ b/core/http/endpoints/localai/settings.go @@ -0,0 +1,279 @@ +package localai + +import ( + "encoding/json" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "time" + + "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/application" + "github.com/rs/zerolog/log" +) + +type SettingsResponse struct { + Success bool `json:"success"` + Error string `json:"error,omitempty"` + Message string `json:"message,omitempty"` +} + +type RuntimeSettings struct { + WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` + WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` + WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` + WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` + WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` + SingleBackend *bool `json:"single_backend,omitempty"` + ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` +} + +type CurrentSettings struct { + WatchdogEnabled bool `json:"watchdog_enabled"` + WatchdogIdleEnabled bool `json:"watchdog_idle_enabled"` + WatchdogBusyEnabled bool `json:"watchdog_busy_enabled"` + WatchdogIdleTimeout string `json:"watchdog_idle_timeout"` + WatchdogBusyTimeout string `json:"watchdog_busy_timeout"` + SingleBackend bool `json:"single_backend"` + ParallelBackendRequests bool `json:"parallel_backend_requests"` + Source string `json:"source"` // "env", "file", or "default" +} + +// getEnvVarWithPrecedence checks multiple env var names and returns the first one found +func getEnvVarWithPrecedence(names ...string) string { + for _, name := range names { + if val := os.Getenv(name); val != "" { + return val + } + } + return "" +} + +// getBoolEnvVar returns true if env var is set to "true", "1", "yes", or "on" +func getBoolEnvVar(names ...string) (bool, bool) { + val := getEnvVarWithPrecedence(names...) + if val == "" { + return false, false + } + val = strings.ToLower(val) + return val == "true" || val == "1" || val == "yes" || val == "on", true +} + +// GetSettingsEndpoint returns current settings with precedence (env > file > defaults) +func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc { + return func(c echo.Context) error { + appConfig := app.ApplicationConfig() + + // Check env vars first + envWatchdogIdle, envWatchdogIdleSet := getBoolEnvVar("LOCALAI_WATCHDOG_IDLE", "WATCHDOG_IDLE") + envWatchdogBusy, envWatchdogBusySet := getBoolEnvVar("LOCALAI_WATCHDOG_BUSY", "WATCHDOG_BUSY") + envWatchdogIdleTimeout := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_IDLE_TIMEOUT", "WATCHDOG_IDLE_TIMEOUT") + envWatchdogBusyTimeout := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_BUSY_TIMEOUT", "WATCHDOG_BUSY_TIMEOUT") + envSingleBackend, envSingleBackendSet := getBoolEnvVar("LOCALAI_SINGLE_ACTIVE_BACKEND", "SINGLE_ACTIVE_BACKEND") + envParallelRequests, envParallelRequestsSet := getBoolEnvVar("LOCALAI_PARALLEL_REQUESTS", "PARALLEL_REQUESTS") + + settings := CurrentSettings{} + + // Determine source and values + if envWatchdogIdleSet || envWatchdogBusySet { + settings.WatchdogIdleEnabled = envWatchdogIdle + settings.WatchdogBusyEnabled = envWatchdogBusy + settings.WatchdogEnabled = envWatchdogIdle || envWatchdogBusy + settings.Source = "env" + } else { + settings.WatchdogIdleEnabled = appConfig.WatchDogIdle + settings.WatchdogBusyEnabled = appConfig.WatchDogBusy + settings.WatchdogEnabled = appConfig.WatchDog + settings.Source = "file" + } + + if envWatchdogIdleTimeout != "" { + settings.WatchdogIdleTimeout = envWatchdogIdleTimeout + if settings.Source == "file" { + settings.Source = "env" + } + } else { + if appConfig.WatchDogIdleTimeout > 0 { + settings.WatchdogIdleTimeout = appConfig.WatchDogIdleTimeout.String() + } else { + settings.WatchdogIdleTimeout = "15m" // default + } + } + + if envWatchdogBusyTimeout != "" { + settings.WatchdogBusyTimeout = envWatchdogBusyTimeout + if settings.Source == "file" { + settings.Source = "env" + } + } else { + if appConfig.WatchDogBusyTimeout > 0 { + settings.WatchdogBusyTimeout = appConfig.WatchDogBusyTimeout.String() + } else { + settings.WatchdogBusyTimeout = "5m" // default + } + } + + if envSingleBackendSet { + settings.SingleBackend = envSingleBackend + if settings.Source == "file" { + settings.Source = "env" + } + } else { + settings.SingleBackend = appConfig.SingleBackend + } + + if envParallelRequestsSet { + settings.ParallelBackendRequests = envParallelRequests + if settings.Source == "file" { + settings.Source = "env" + } + } else { + settings.ParallelBackendRequests = appConfig.ParallelBackendRequests + } + + // If no env vars set and no file values, use defaults + if settings.Source == "file" && !appConfig.WatchDog && !appConfig.SingleBackend && !appConfig.ParallelBackendRequests { + settings.Source = "default" + } + + return c.JSON(http.StatusOK, settings) + } +} + +// UpdateSettingsEndpoint updates settings, saves to file, and applies immediately +func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { + return func(c echo.Context) error { + appConfig := app.ApplicationConfig() + + // Check if env vars are set - if so, reject the update + envWatchdogIdleSet := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_IDLE", "WATCHDOG_IDLE") != "" + envWatchdogBusySet := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_BUSY", "WATCHDOG_BUSY") != "" + envWatchdogIdleTimeoutSet := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_IDLE_TIMEOUT", "WATCHDOG_IDLE_TIMEOUT") != "" + envWatchdogBusyTimeoutSet := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_BUSY_TIMEOUT", "WATCHDOG_BUSY_TIMEOUT") != "" + envSingleBackendSet := getEnvVarWithPrecedence("LOCALAI_SINGLE_ACTIVE_BACKEND", "SINGLE_ACTIVE_BACKEND") != "" + envParallelRequestsSet := getEnvVarWithPrecedence("LOCALAI_PARALLEL_REQUESTS", "PARALLEL_REQUESTS") != "" + + if envWatchdogIdleSet || envWatchdogBusySet || envWatchdogIdleTimeoutSet || envWatchdogBusyTimeoutSet || envSingleBackendSet || envParallelRequestsSet { + return c.JSON(http.StatusBadRequest, SettingsResponse{ + Success: false, + Error: "Cannot update settings: environment variables are set and take precedence. Please unset environment variables first.", + }) + } + + body, err := io.ReadAll(c.Request().Body) + if err != nil { + return c.JSON(http.StatusBadRequest, SettingsResponse{ + Success: false, + Error: "Failed to read request body: " + err.Error(), + }) + } + + var settings RuntimeSettings + if err := json.Unmarshal(body, &settings); err != nil { + return c.JSON(http.StatusBadRequest, SettingsResponse{ + Success: false, + Error: "Failed to parse JSON: " + err.Error(), + }) + } + + // Validate timeouts if provided + if settings.WatchdogIdleTimeout != nil { + _, err := time.ParseDuration(*settings.WatchdogIdleTimeout) + if err != nil { + return c.JSON(http.StatusBadRequest, SettingsResponse{ + Success: false, + Error: "Invalid watchdog_idle_timeout format: " + err.Error(), + }) + } + } + if settings.WatchdogBusyTimeout != nil { + _, err := time.ParseDuration(*settings.WatchdogBusyTimeout) + if err != nil { + return c.JSON(http.StatusBadRequest, SettingsResponse{ + Success: false, + Error: "Invalid watchdog_busy_timeout format: " + err.Error(), + }) + } + } + + // Save to file + if appConfig.DynamicConfigsDir == "" { + return c.JSON(http.StatusBadRequest, SettingsResponse{ + Success: false, + Error: "DynamicConfigsDir is not set", + }) + } + + settingsFile := filepath.Join(appConfig.DynamicConfigsDir, "runtime_settings.json") + settingsJSON, err := json.MarshalIndent(settings, "", " ") + if err != nil { + return c.JSON(http.StatusInternalServerError, SettingsResponse{ + Success: false, + Error: "Failed to marshal settings: " + err.Error(), + }) + } + + if err := os.WriteFile(settingsFile, settingsJSON, 0600); err != nil { + return c.JSON(http.StatusInternalServerError, SettingsResponse{ + Success: false, + Error: "Failed to write settings file: " + err.Error(), + }) + } + + // Apply settings immediately + watchdogChanged := false + if settings.WatchdogEnabled != nil { + appConfig.WatchDog = *settings.WatchdogEnabled + watchdogChanged = true + } + if settings.WatchdogIdleEnabled != nil { + appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled + if appConfig.WatchDogIdle { + appConfig.WatchDog = true + } + watchdogChanged = true + } + if settings.WatchdogBusyEnabled != nil { + appConfig.WatchDogBusy = *settings.WatchdogBusyEnabled + if appConfig.WatchDogBusy { + appConfig.WatchDog = true + } + watchdogChanged = true + } + if settings.WatchdogIdleTimeout != nil { + dur, _ := time.ParseDuration(*settings.WatchdogIdleTimeout) + appConfig.WatchDogIdleTimeout = dur + watchdogChanged = true + } + if settings.WatchdogBusyTimeout != nil { + dur, _ := time.ParseDuration(*settings.WatchdogBusyTimeout) + appConfig.WatchDogBusyTimeout = dur + watchdogChanged = true + } + if settings.SingleBackend != nil { + appConfig.SingleBackend = *settings.SingleBackend + } + if settings.ParallelBackendRequests != nil { + appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests + } + + // Restart watchdog if settings changed + if watchdogChanged { + if err := app.RestartWatchdog(); err != nil { + log.Error().Err(err).Msg("Failed to restart watchdog") + return c.JSON(http.StatusInternalServerError, SettingsResponse{ + Success: false, + Error: "Settings saved but failed to restart watchdog: " + err.Error(), + }) + } + } + + return c.JSON(http.StatusOK, SettingsResponse{ + Success: true, + Message: "Settings updated successfully", + }) + } +} + diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 776547e5c979..de329c11de06 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -22,6 +22,15 @@ func RegisterUIRoutes(app *echo.Echo, app.GET("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps)) app.GET("/manage", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps)) + + // Settings page + app.GET("/settings", func(c echo.Context) error { + summary := map[string]interface{}{ + "Title": "LocalAI - Settings", + "BaseURL": middleware.BaseURL(c), + } + return c.Render(200, "views/settings", summary) + }) // P2P app.GET("/p2p/", func(c echo.Context) error { diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index b6c8c67cc7ea..cd56c0e5c3ee 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -12,8 +12,10 @@ import ( "github.com/google/uuid" "github.com/labstack/echo/v4" + "github.com/mudler/LocalAI/core/application" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/http/endpoints/localai" "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/pkg/model" @@ -21,7 +23,7 @@ import ( ) // RegisterUIAPIRoutes registers JSON API routes for the web UI -func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache) { +func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig, galleryService *services.GalleryService, opcache *services.OpCache, applicationInstance *application.Application) { // Operations API - Get all current operations (models + backends) app.GET("/api/operations", func(c echo.Context) error { @@ -802,4 +804,8 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model }, }) }) + + // Settings API + app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance)) + app.POST("/api/settings", localai.UpdateSettingsEndpoint(applicationInstance)) } diff --git a/core/http/views/manage.html b/core/http/views/manage.html index c4c583853aed..3a460dff639e 100644 --- a/core/http/views/manage.html +++ b/core/http/views/manage.html @@ -66,6 +66,12 @@

Backend Gallery + + + + Settings + diff --git a/core/http/views/settings.html b/core/http/views/settings.html new file mode 100644 index 000000000000..527c8b09c922 --- /dev/null +++ b/core/http/views/settings.html @@ -0,0 +1,321 @@ + + +{{template "views/partials/head" .}} + + +
+ + {{template "views/partials/navbar" .}} + + +
+ +
+ +
+ +
+
+

+ Application Settings +

+ + + Back to Manage + +
+

Configure watchdog and backend request settings

+
+ + +
+ +
+

+ + Watchdog Settings +

+

+ Configure automatic monitoring and management of backend processes +

+ +
+ +
+
+ +

Enable automatic monitoring of backend processes

+
+ +
+ + +
+
+ +

Automatically stop backends that are idle for too long

+
+ +
+ + +
+ +

Time before an idle backend is stopped (e.g., 15m, 1h)

+ +
+ + +
+
+ +

Automatically stop backends that are busy for too long (stuck processes)

+
+ +
+ + +
+ +

Time before a busy backend is stopped (e.g., 5m, 30m)

+ +
+
+
+ + +
+

+ + Backend Request Settings +

+

+ Configure how backends handle multiple requests +

+ +
+ +
+
+ +

Allow only one backend to be active at a time

+
+ +
+ + +
+
+ +

Enable backends to handle multiple requests in parallel (if supported)

+
+ +
+
+
+ + +
+
+ +
+

Configuration Source

+

+

+ Environment variables take precedence. To modify settings via the UI, unset the relevant environment variables first. +

+
+
+
+ + +
+ +
+
+
+ + {{template "views/partials/footer" .}} +
+ + + + + + diff --git a/pkg/model/loader.go b/pkg/model/loader.go index 0851d3cac113..2ef6ccddcb11 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -44,6 +44,10 @@ func (ml *ModelLoader) SetWatchDog(wd *WatchDog) { ml.wd = wd } +func (ml *ModelLoader) GetWatchDog() *WatchDog { + return ml.wd +} + func (ml *ModelLoader) ExistsInModelPath(s string) bool { return utils.ExistsInPath(ml.ModelPath, s) } From 21c1b82040ec9b58ae9e0b97087d0951ff624aa7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 20 Nov 2025 16:56:31 +0100 Subject: [PATCH 2/9] Do not re-read env Signed-off-by: Ettore Di Giacinto --- core/application/application.go | 6 + core/application/startup.go | 6 + core/http/endpoints/localai/settings.go | 145 +++++++++--------------- 3 files changed, 67 insertions(+), 90 deletions(-) diff --git a/core/application/application.go b/core/application/application.go index a4fcdacaa026..601cfb89cef0 100644 --- a/core/application/application.go +++ b/core/application/application.go @@ -15,6 +15,7 @@ type Application struct { backendLoader *config.ModelConfigLoader modelLoader *model.ModelLoader applicationConfig *config.ApplicationConfig + startupConfig *config.ApplicationConfig // Stores original config from env vars (before file loading) templatesEvaluator *templates.Evaluator galleryService *services.GalleryService watchdogMutex sync.Mutex @@ -50,6 +51,11 @@ func (a *Application) GalleryService() *services.GalleryService { return a.galleryService } +// StartupConfig returns the original startup configuration (from env vars, before file loading) +func (a *Application) StartupConfig() *config.ApplicationConfig { + return a.startupConfig +} + // RestartWatchdog restarts the watchdog with current ApplicationConfig settings func (a *Application) RestartWatchdog() error { a.watchdogMutex.Lock() diff --git a/core/application/startup.go b/core/application/startup.go index 60643b33f83a..6186424e5c4f 100644 --- a/core/application/startup.go +++ b/core/application/startup.go @@ -21,7 +21,12 @@ import ( func New(opts ...config.AppOption) (*Application, error) { options := config.NewApplicationConfig(opts...) + + // Store a copy of the startup config (from env vars, before file loading) + // This is used to determine if settings came from env vars vs file + startupConfigCopy := *options application := newApplication(options) + application.startupConfig = &startupConfigCopy log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.SystemState.Model.ModelsPath) log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion()) @@ -115,6 +120,7 @@ func New(opts ...config.AppOption) (*Application, error) { // Load runtime settings from file if DynamicConfigsDir is set // This applies file settings with env var precedence (env vars take priority) + // Note: startupConfigCopy was already created above, so it has the original env var values if options.DynamicConfigsDir != "" { loadRuntimeSettingsFromFile(options) } diff --git a/core/http/endpoints/localai/settings.go b/core/http/endpoints/localai/settings.go index 5621affb0458..fe0717cefd7c 100644 --- a/core/http/endpoints/localai/settings.go +++ b/core/http/endpoints/localai/settings.go @@ -6,7 +6,6 @@ import ( "net/http" "os" "path/filepath" - "strings" "time" "github.com/labstack/echo/v4" @@ -24,7 +23,7 @@ type RuntimeSettings struct { WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` - WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` + WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` SingleBackend *bool `json:"single_backend,omitempty"` ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` @@ -34,108 +33,67 @@ type CurrentSettings struct { WatchdogEnabled bool `json:"watchdog_enabled"` WatchdogIdleEnabled bool `json:"watchdog_idle_enabled"` WatchdogBusyEnabled bool `json:"watchdog_busy_enabled"` - WatchdogIdleTimeout string `json:"watchdog_idle_timeout"` + WatchdogIdleTimeout string `json:"watchdog_idle_timeout"` WatchdogBusyTimeout string `json:"watchdog_busy_timeout"` SingleBackend bool `json:"single_backend"` ParallelBackendRequests bool `json:"parallel_backend_requests"` Source string `json:"source"` // "env", "file", or "default" } -// getEnvVarWithPrecedence checks multiple env var names and returns the first one found -func getEnvVarWithPrecedence(names ...string) string { - for _, name := range names { - if val := os.Getenv(name); val != "" { - return val - } - } - return "" -} - -// getBoolEnvVar returns true if env var is set to "true", "1", "yes", or "on" -func getBoolEnvVar(names ...string) (bool, bool) { - val := getEnvVarWithPrecedence(names...) - if val == "" { - return false, false - } - val = strings.ToLower(val) - return val == "true" || val == "1" || val == "yes" || val == "on", true -} - // GetSettingsEndpoint returns current settings with precedence (env > file > defaults) func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { appConfig := app.ApplicationConfig() - - // Check env vars first - envWatchdogIdle, envWatchdogIdleSet := getBoolEnvVar("LOCALAI_WATCHDOG_IDLE", "WATCHDOG_IDLE") - envWatchdogBusy, envWatchdogBusySet := getBoolEnvVar("LOCALAI_WATCHDOG_BUSY", "WATCHDOG_BUSY") - envWatchdogIdleTimeout := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_IDLE_TIMEOUT", "WATCHDOG_IDLE_TIMEOUT") - envWatchdogBusyTimeout := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_BUSY_TIMEOUT", "WATCHDOG_BUSY_TIMEOUT") - envSingleBackend, envSingleBackendSet := getBoolEnvVar("LOCALAI_SINGLE_ACTIVE_BACKEND", "SINGLE_ACTIVE_BACKEND") - envParallelRequests, envParallelRequestsSet := getBoolEnvVar("LOCALAI_PARALLEL_REQUESTS", "PARALLEL_REQUESTS") - - settings := CurrentSettings{} + startupConfig := app.StartupConfig() - // Determine source and values - if envWatchdogIdleSet || envWatchdogBusySet { - settings.WatchdogIdleEnabled = envWatchdogIdle - settings.WatchdogBusyEnabled = envWatchdogBusy - settings.WatchdogEnabled = envWatchdogIdle || envWatchdogBusy - settings.Source = "env" - } else { - settings.WatchdogIdleEnabled = appConfig.WatchDogIdle - settings.WatchdogBusyEnabled = appConfig.WatchDogBusy - settings.WatchdogEnabled = appConfig.WatchDog - settings.Source = "file" + if startupConfig == nil { + // Fallback if startup config not available + startupConfig = appConfig } - if envWatchdogIdleTimeout != "" { - settings.WatchdogIdleTimeout = envWatchdogIdleTimeout - if settings.Source == "file" { - settings.Source = "env" - } - } else { - if appConfig.WatchDogIdleTimeout > 0 { - settings.WatchdogIdleTimeout = appConfig.WatchDogIdleTimeout.String() - } else { - settings.WatchdogIdleTimeout = "15m" // default - } - } + settings := CurrentSettings{} - if envWatchdogBusyTimeout != "" { - settings.WatchdogBusyTimeout = envWatchdogBusyTimeout - if settings.Source == "file" { - settings.Source = "env" - } + // Determine if values came from env vars by comparing with startup config + // If current values match startup values, they came from env vars (or defaults) + // If current values differ from startup, file changed them (so not from env var) + envWatchdogIdle := appConfig.WatchDogIdle == startupConfig.WatchDogIdle + envWatchdogBusy := appConfig.WatchDogBusy == startupConfig.WatchDogBusy + envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupConfig.WatchDogIdleTimeout + envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupConfig.WatchDogBusyTimeout + envSingleBackend := appConfig.SingleBackend == startupConfig.SingleBackend + envParallelRequests := appConfig.ParallelBackendRequests == startupConfig.ParallelBackendRequests + + // Determine source: if any setting matches startup config, it's from env (or default) + // If any setting differs from startup, it's from file + settings.WatchdogIdleEnabled = appConfig.WatchDogIdle + settings.WatchdogBusyEnabled = appConfig.WatchDogBusy + settings.WatchdogEnabled = appConfig.WatchDog + settings.SingleBackend = appConfig.SingleBackend + settings.ParallelBackendRequests = appConfig.ParallelBackendRequests + + if appConfig.WatchDogIdleTimeout > 0 { + settings.WatchdogIdleTimeout = appConfig.WatchDogIdleTimeout.String() } else { - if appConfig.WatchDogBusyTimeout > 0 { - settings.WatchdogBusyTimeout = appConfig.WatchDogBusyTimeout.String() - } else { - settings.WatchdogBusyTimeout = "5m" // default - } + settings.WatchdogIdleTimeout = "15m" // default } - if envSingleBackendSet { - settings.SingleBackend = envSingleBackend - if settings.Source == "file" { - settings.Source = "env" - } + if appConfig.WatchDogBusyTimeout > 0 { + settings.WatchdogBusyTimeout = appConfig.WatchDogBusyTimeout.String() } else { - settings.SingleBackend = appConfig.SingleBackend + settings.WatchdogBusyTimeout = "5m" // default } - if envParallelRequestsSet { - settings.ParallelBackendRequests = envParallelRequests - if settings.Source == "file" { + // Determine overall source: if all settings match startup, it's "env" or "default" + // If any setting differs, it's "file" + if envWatchdogIdle && envWatchdogBusy && envWatchdogIdleTimeout && envWatchdogBusyTimeout && envSingleBackend && envParallelRequests { + // All match startup - check if they're at defaults + if !appConfig.WatchDog && !appConfig.SingleBackend && !appConfig.ParallelBackendRequests && appConfig.WatchDogIdleTimeout == 0 && appConfig.WatchDogBusyTimeout == 0 { + settings.Source = "default" + } else { settings.Source = "env" } } else { - settings.ParallelBackendRequests = appConfig.ParallelBackendRequests - } - - // If no env vars set and no file values, use defaults - if settings.Source == "file" && !appConfig.WatchDog && !appConfig.SingleBackend && !appConfig.ParallelBackendRequests { - settings.Source = "default" + settings.Source = "file" } return c.JSON(http.StatusOK, settings) @@ -146,16 +104,24 @@ func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc { func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { return func(c echo.Context) error { appConfig := app.ApplicationConfig() + startupConfig := app.StartupConfig() - // Check if env vars are set - if so, reject the update - envWatchdogIdleSet := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_IDLE", "WATCHDOG_IDLE") != "" - envWatchdogBusySet := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_BUSY", "WATCHDOG_BUSY") != "" - envWatchdogIdleTimeoutSet := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_IDLE_TIMEOUT", "WATCHDOG_IDLE_TIMEOUT") != "" - envWatchdogBusyTimeoutSet := getEnvVarWithPrecedence("LOCALAI_WATCHDOG_BUSY_TIMEOUT", "WATCHDOG_BUSY_TIMEOUT") != "" - envSingleBackendSet := getEnvVarWithPrecedence("LOCALAI_SINGLE_ACTIVE_BACKEND", "SINGLE_ACTIVE_BACKEND") != "" - envParallelRequestsSet := getEnvVarWithPrecedence("LOCALAI_PARALLEL_REQUESTS", "PARALLEL_REQUESTS") != "" + if startupConfig == nil { + // Fallback if startup config not available + startupConfig = appConfig + } + + // Check if env vars are set by comparing with startup config + // If current values match startup values, they came from env vars (or defaults) + // If current values differ from startup, file changed them (so not from env var) + envWatchdogIdle := appConfig.WatchDogIdle == startupConfig.WatchDogIdle + envWatchdogBusy := appConfig.WatchDogBusy == startupConfig.WatchDogBusy + envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupConfig.WatchDogIdleTimeout + envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupConfig.WatchDogBusyTimeout + envSingleBackend := appConfig.SingleBackend == startupConfig.SingleBackend + envParallelRequests := appConfig.ParallelBackendRequests == startupConfig.ParallelBackendRequests - if envWatchdogIdleSet || envWatchdogBusySet || envWatchdogIdleTimeoutSet || envWatchdogBusyTimeoutSet || envSingleBackendSet || envParallelRequestsSet { + if envWatchdogIdle || envWatchdogBusy || envWatchdogIdleTimeout || envWatchdogBusyTimeout || envSingleBackend || envParallelRequests { return c.JSON(http.StatusBadRequest, SettingsResponse{ Success: false, Error: "Cannot update settings: environment variables are set and take precedence. Please unset environment variables first.", @@ -276,4 +242,3 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { }) } } - From d938bbb9872dec734d11477124646abd3c7fcff2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 20 Nov 2025 18:39:43 +0100 Subject: [PATCH 3/9] Some refactor, move other settings to runtime (p2p) Signed-off-by: Ettore Di Giacinto --- core/application/application.go | 63 +---- core/application/config_file_watcher.go | 101 ++++++-- core/application/p2p.go | 240 +++++++++++++++++++ core/application/watchdog.go | 88 +++++++ core/cli/api/p2p.go | 87 ------- core/cli/run.go | 18 +- core/config/application_config.go | 13 ++ core/http/endpoints/localai/mcp.go | 7 +- core/http/endpoints/localai/settings.go | 236 +++++++++++++------ core/http/endpoints/openai/mcp.go | 8 +- core/http/views/settings.html | 297 +++++++++++++++++++++++- core/p2p/sync.go | 102 -------- pkg/model/watchdog.go | 1 + 13 files changed, 909 insertions(+), 352 deletions(-) create mode 100644 core/application/p2p.go create mode 100644 core/application/watchdog.go delete mode 100644 core/cli/api/p2p.go delete mode 100644 core/p2p/sync.go diff --git a/core/application/application.go b/core/application/application.go index 601cfb89cef0..24c53fcbae65 100644 --- a/core/application/application.go +++ b/core/application/application.go @@ -1,14 +1,13 @@ package application import ( + "context" "sync" - "time" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/services" "github.com/mudler/LocalAI/core/templates" "github.com/mudler/LocalAI/pkg/model" - "github.com/rs/zerolog/log" ) type Application struct { @@ -20,6 +19,9 @@ type Application struct { galleryService *services.GalleryService watchdogMutex sync.Mutex watchdogStop chan bool + p2pMutex sync.Mutex + p2pCtx context.Context + p2pCancel context.CancelFunc } func newApplication(appConfig *config.ApplicationConfig) *Application { @@ -56,63 +58,6 @@ func (a *Application) StartupConfig() *config.ApplicationConfig { return a.startupConfig } -// RestartWatchdog restarts the watchdog with current ApplicationConfig settings -func (a *Application) RestartWatchdog() error { - a.watchdogMutex.Lock() - defer a.watchdogMutex.Unlock() - - appConfig := a.ApplicationConfig() - - // Shutdown existing watchdog if running - if a.watchdogStop != nil { - close(a.watchdogStop) - a.watchdogStop = nil - } - - // Shutdown existing watchdog if running - currentWD := a.modelLoader.GetWatchDog() - if currentWD != nil { - currentWD.Shutdown() - // Wait a bit for shutdown to complete - time.Sleep(100 * time.Millisecond) - } - - // Create new watchdog if enabled - if appConfig.WatchDog { - wd := model.NewWatchDog( - a.modelLoader, - appConfig.WatchDogBusyTimeout, - appConfig.WatchDogIdleTimeout, - appConfig.WatchDogBusy, - appConfig.WatchDogIdle) - a.modelLoader.SetWatchDog(wd) - - // Create new stop channel - a.watchdogStop = make(chan bool, 1) - - // Start watchdog goroutine - go wd.Run() - - // Setup shutdown handler - go func() { - select { - case <-a.watchdogStop: - log.Debug().Msg("Watchdog stop signal received") - wd.Shutdown() - case <-appConfig.Context.Done(): - log.Debug().Msg("Context canceled, shutting down watchdog") - wd.Shutdown() - } - }() - - log.Info().Msg("Watchdog restarted with new settings") - } else { - log.Info().Msg("Watchdog disabled") - } - - return nil -} - func (a *Application) start() error { galleryService := services.NewGalleryService(a.ApplicationConfig(), a.ModelLoader()) err := galleryService.Start(a.ApplicationConfig().Context, a.ModelConfigLoader(), a.ApplicationConfig().SystemState) diff --git a/core/application/config_file_watcher.go b/core/application/config_file_watcher.go index ef922355eacf..bcbe2eb51ccd 100644 --- a/core/application/config_file_watcher.go +++ b/core/application/config_file_watcher.go @@ -184,13 +184,27 @@ func readExternalBackendsJson(startupAppConfig config.ApplicationConfig) fileHan } type runtimeSettings struct { - WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` - WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` - WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` - WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` - WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` - SingleBackend *bool `json:"single_backend,omitempty"` - ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` + WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` + WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` + WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` + WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` + WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` + SingleBackend *bool `json:"single_backend,omitempty"` + ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` + Threads *int `json:"threads,omitempty"` + ContextSize *int `json:"context_size,omitempty"` + F16 *bool `json:"f16,omitempty"` + Debug *bool `json:"debug,omitempty"` + CORS *bool `json:"cors,omitempty"` + CSRF *bool `json:"csrf,omitempty"` + CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"` + P2PToken *string `json:"p2p_token,omitempty"` + P2PNetworkID *string `json:"p2p_network_id,omitempty"` + Federated *bool `json:"federated,omitempty"` + Galleries *[]config.Gallery `json:"galleries,omitempty"` + BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"` + AutoloadGalleries *bool `json:"autoload_galleries,omitempty"` + AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"` } func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler { @@ -199,15 +213,26 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand // Determine if settings came from env vars by comparing with startup config // startupAppConfig contains the original values set from env vars at startup. - // If current values match startup values and are non-default, they came from env vars. - // We apply file settings only if current values match startup defaults (false/0), - // which suggests they weren't set from env vars. - envWatchdogIdle := appConfig.WatchDogIdle == startupAppConfig.WatchDogIdle && startupAppConfig.WatchDogIdle - envWatchdogBusy := appConfig.WatchDogBusy == startupAppConfig.WatchDogBusy && startupAppConfig.WatchDogBusy - envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupAppConfig.WatchDogIdleTimeout && startupAppConfig.WatchDogIdleTimeout > 0 - envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupAppConfig.WatchDogBusyTimeout && startupAppConfig.WatchDogBusyTimeout > 0 - envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend && startupAppConfig.SingleBackend - envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests && startupAppConfig.ParallelBackendRequests + // If current values match startup values, they came from env vars (or defaults). + // We apply file settings only if current values match startup values (meaning not from env vars). + envWatchdogIdle := appConfig.WatchDogIdle == startupAppConfig.WatchDogIdle + envWatchdogBusy := appConfig.WatchDogBusy == startupAppConfig.WatchDogBusy + envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupAppConfig.WatchDogIdleTimeout + envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupAppConfig.WatchDogBusyTimeout + envSingleBackend := appConfig.SingleBackend == startupAppConfig.SingleBackend + envParallelRequests := appConfig.ParallelBackendRequests == startupAppConfig.ParallelBackendRequests + envThreads := appConfig.Threads == startupAppConfig.Threads + envContextSize := appConfig.ContextSize == startupAppConfig.ContextSize + envF16 := appConfig.F16 == startupAppConfig.F16 + envDebug := appConfig.Debug == startupAppConfig.Debug + envCORS := appConfig.CORS == startupAppConfig.CORS + envCSRF := appConfig.CSRF == startupAppConfig.CSRF + envCORSAllowOrigins := appConfig.CORSAllowOrigins == startupAppConfig.CORSAllowOrigins + envP2PToken := appConfig.P2PToken == startupAppConfig.P2PToken + envP2PNetworkID := appConfig.P2PNetworkID == startupAppConfig.P2PNetworkID + envFederated := appConfig.Federated == startupAppConfig.Federated + envAutoloadGalleries := appConfig.AutoloadGalleries == startupAppConfig.AutoloadGalleries + envAutoloadBackendGalleries := appConfig.AutoloadBackendGalleries == startupAppConfig.AutoloadBackendGalleries if len(fileContent) > 0 { var settings runtimeSettings @@ -216,7 +241,7 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand return err } - // Apply file settings only if they don't match startup non-default values (i.e., not from env vars) + // Apply file settings only if they don't match startup values (i.e., not from env vars) if settings.WatchdogIdleEnabled != nil && !envWatchdogIdle { appConfig.WatchDogIdle = *settings.WatchdogIdleEnabled if appConfig.WatchDogIdle { @@ -251,6 +276,48 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand if settings.ParallelBackendRequests != nil && !envParallelRequests { appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests } + if settings.Threads != nil && !envThreads { + appConfig.Threads = *settings.Threads + } + if settings.ContextSize != nil && !envContextSize { + appConfig.ContextSize = *settings.ContextSize + } + if settings.F16 != nil && !envF16 { + appConfig.F16 = *settings.F16 + } + if settings.Debug != nil && !envDebug { + appConfig.Debug = *settings.Debug + } + if settings.CORS != nil && !envCORS { + appConfig.CORS = *settings.CORS + } + if settings.CSRF != nil && !envCSRF { + appConfig.CSRF = *settings.CSRF + } + if settings.CORSAllowOrigins != nil && !envCORSAllowOrigins { + appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins + } + if settings.P2PToken != nil && !envP2PToken { + appConfig.P2PToken = *settings.P2PToken + } + if settings.P2PNetworkID != nil && !envP2PNetworkID { + appConfig.P2PNetworkID = *settings.P2PNetworkID + } + if settings.Federated != nil && !envFederated { + appConfig.Federated = *settings.Federated + } + if settings.Galleries != nil { + appConfig.Galleries = *settings.Galleries + } + if settings.BackendGalleries != nil { + appConfig.BackendGalleries = *settings.BackendGalleries + } + if settings.AutoloadGalleries != nil && !envAutoloadGalleries { + appConfig.AutoloadGalleries = *settings.AutoloadGalleries + } + if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries { + appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries + } // If watchdog is enabled via file but not via env, ensure WatchDog flag is set if !envWatchdogIdle && !envWatchdogBusy { diff --git a/core/application/p2p.go b/core/application/p2p.go new file mode 100644 index 000000000000..87618e913a97 --- /dev/null +++ b/core/application/p2p.go @@ -0,0 +1,240 @@ +package application + +import ( + "context" + "fmt" + "net" + "slices" + "time" + + "github.com/google/uuid" + "github.com/mudler/LocalAI/core/gallery" + "github.com/mudler/LocalAI/core/p2p" + "github.com/mudler/LocalAI/core/schema" + "github.com/mudler/LocalAI/core/services" + + "github.com/mudler/edgevpn/pkg/node" + "github.com/rs/zerolog/log" + zlog "github.com/rs/zerolog/log" +) + +func (a *Application) StopP2P() error { + if a.p2pCancel != nil { + a.p2pCancel() + a.p2pCancel = nil + a.p2pCtx = nil + // Wait a bit for shutdown to complete + time.Sleep(200 * time.Millisecond) + } + return nil +} + +func (a *Application) StartP2P() error { + // we need a p2p token + if a.applicationConfig.P2PToken == "" { + return fmt.Errorf("P2P token is not set") + } + + networkID := a.applicationConfig.P2PNetworkID + + ctx, cancel := context.WithCancel(a.ApplicationConfig().Context) + a.p2pCtx = ctx + a.p2pCancel = cancel + + var n *node.Node + // Here we are avoiding creating multiple nodes: + // - if the federated mode is enabled, we create a federated node and expose a service + // - exposing a service creates a node with specific options, and we don't want to create another node + + // If the federated mode is enabled, we expose a service to the local instance running + // at r.Address + if a.applicationConfig.Federated { + _, port, err := net.SplitHostPort(a.applicationConfig.APIAddress) + if err != nil { + return err + } + + // Here a new node is created and started + // and a service is exposed by the node + node, err := p2p.ExposeService(ctx, "localhost", port, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.FederatedID)) + if err != nil { + return err + } + + if err := p2p.ServiceDiscoverer(ctx, node, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil { + return err + } + + n = node + // start node sync in the background + if err := a.p2pSync(ctx, node); err != nil { + return err + } + } + + // If a node wasn't created previously, create it + if n == nil { + node, err := p2p.NewNode(a.applicationConfig.P2PToken) + if err != nil { + return err + } + err = node.Start(ctx) + if err != nil { + return fmt.Errorf("starting new node: %w", err) + } + n = node + } + + // Attach a ServiceDiscoverer to the p2p node + log.Info().Msg("Starting P2P server discovery...") + if err := p2p.ServiceDiscoverer(ctx, n, a.applicationConfig.P2PToken, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node schema.NodeData) { + var tunnelAddresses []string + for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) { + if v.IsOnline() { + tunnelAddresses = append(tunnelAddresses, v.TunnelAddress) + } else { + log.Info().Msgf("Node %s is offline", v.ID) + } + } + if a.applicationConfig.TunnelCallback != nil { + a.applicationConfig.TunnelCallback(tunnelAddresses) + } + }, true); err != nil { + return err + } + + return nil +} + +// RestartP2P restarts the P2P stack with current ApplicationConfig settings +// Note: This method signals that P2P should be restarted, but the actual restart +// is handled by the caller to avoid import cycles +func (a *Application) RestartP2P() error { + a.p2pMutex.Lock() + defer a.p2pMutex.Unlock() + + // Stop existing P2P if running + if a.p2pCancel != nil { + a.p2pCancel() + a.p2pCancel = nil + a.p2pCtx = nil + // Wait a bit for shutdown to complete + time.Sleep(200 * time.Millisecond) + } + + appConfig := a.ApplicationConfig() + + // Start P2P if token is set + if appConfig.P2PToken == "" { + return fmt.Errorf("P2P token is not set") + } + + // Create new context for P2P + ctx, cancel := context.WithCancel(appConfig.Context) + a.p2pCtx = ctx + a.p2pCancel = cancel + + // Get API address from config + address := appConfig.APIAddress + if address == "" { + address = "127.0.0.1:8080" // default + } + + // Start P2P stack in a goroutine + go func() { + if err := a.StartP2P(); err != nil { + log.Error().Err(err).Msg("Failed to start P2P stack") + cancel() // Cancel context on error + } + }() + log.Info().Msg("P2P stack restarted with new settings") + + return nil +} + +func syncState(ctx context.Context, n *node.Node, app *Application) error { + zlog.Debug().Msg("[p2p-sync] Syncing state") + + whatWeHave := []string{} + for _, model := range app.ModelConfigLoader().GetAllModelsConfigs() { + whatWeHave = append(whatWeHave, model.Name) + } + + ledger, _ := n.Ledger() + currentData := ledger.CurrentData() + zlog.Debug().Msgf("[p2p-sync] Current data: %v", currentData) + data, exists := ledger.GetKey("shared_state", "models") + if !exists { + ledger.AnnounceUpdate(ctx, time.Minute, "shared_state", "models", whatWeHave) + zlog.Debug().Msgf("No models found in the ledger, announced our models: %v", whatWeHave) + } + + models := []string{} + if err := data.Unmarshal(&models); err != nil { + zlog.Warn().Err(err).Msg("error unmarshalling models") + return nil + } + + zlog.Debug().Msgf("[p2p-sync] Models that are present in this instance: %v\nModels that are in the ledger: %v", whatWeHave, models) + + // Sync with our state + whatIsNotThere := []string{} + for _, model := range whatWeHave { + if !slices.Contains(models, model) { + whatIsNotThere = append(whatIsNotThere, model) + } + } + if len(whatIsNotThere) > 0 { + zlog.Debug().Msgf("[p2p-sync] Announcing our models: %v", append(models, whatIsNotThere...)) + ledger.AnnounceUpdate( + ctx, + 1*time.Minute, + "shared_state", + "models", + append(models, whatIsNotThere...), + ) + } + + // Check if we have a model that is not in our state, otherwise install it + for _, model := range models { + if slices.Contains(whatWeHave, model) { + zlog.Debug().Msgf("[p2p-sync] Model %s is already present in this instance", model) + continue + } + + // we install model + zlog.Info().Msgf("[p2p-sync] Installing model which is not present in this instance: %s", model) + + uuid, err := uuid.NewUUID() + if err != nil { + zlog.Error().Err(err).Msg("error generating UUID") + continue + } + + app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{ + ID: uuid.String(), + GalleryElementName: model, + Galleries: app.ApplicationConfig().Galleries, + BackendGalleries: app.ApplicationConfig().BackendGalleries, + } + } + + return nil +} + +func (a *Application) p2pSync(ctx context.Context, n *node.Node) error { + go func() { + for { + select { + case <-ctx.Done(): + return + case <-time.After(1 * time.Minute): + if err := syncState(ctx, n, a); err != nil { + zlog.Error().Err(err).Msg("error syncing state") + } + } + + } + }() + return nil +} diff --git a/core/application/watchdog.go b/core/application/watchdog.go new file mode 100644 index 000000000000..20acf0b7a491 --- /dev/null +++ b/core/application/watchdog.go @@ -0,0 +1,88 @@ +package application + +import ( + "time" + + "github.com/mudler/LocalAI/pkg/model" + "github.com/rs/zerolog/log" +) + +func (a *Application) StopWatchdog() error { + if a.watchdogStop != nil { + close(a.watchdogStop) + a.watchdogStop = nil + } + return nil +} + +// startWatchdog starts the watchdog with current ApplicationConfig settings +// This is an internal method that assumes the caller holds the watchdogMutex +func (a *Application) startWatchdog() error { + appConfig := a.ApplicationConfig() + + // Create new watchdog if enabled + if appConfig.WatchDog { + wd := model.NewWatchDog( + a.modelLoader, + appConfig.WatchDogBusyTimeout, + appConfig.WatchDogIdleTimeout, + appConfig.WatchDogBusy, + appConfig.WatchDogIdle) + a.modelLoader.SetWatchDog(wd) + + // Create new stop channel + a.watchdogStop = make(chan bool, 1) + + // Start watchdog goroutine + go wd.Run() + + // Setup shutdown handler + go func() { + select { + case <-a.watchdogStop: + log.Debug().Msg("Watchdog stop signal received") + wd.Shutdown() + case <-appConfig.Context.Done(): + log.Debug().Msg("Context canceled, shutting down watchdog") + wd.Shutdown() + } + }() + + log.Info().Msg("Watchdog started with new settings") + } else { + log.Info().Msg("Watchdog disabled") + } + + return nil +} + +// StartWatchdog starts the watchdog with current ApplicationConfig settings +func (a *Application) StartWatchdog() error { + a.watchdogMutex.Lock() + defer a.watchdogMutex.Unlock() + + return a.startWatchdog() +} + +// RestartWatchdog restarts the watchdog with current ApplicationConfig settings +func (a *Application) RestartWatchdog() error { + a.watchdogMutex.Lock() + defer a.watchdogMutex.Unlock() + + // Shutdown existing watchdog if running + if a.watchdogStop != nil { + close(a.watchdogStop) + a.watchdogStop = nil + } + + // Shutdown existing watchdog if running + currentWD := a.modelLoader.GetWatchDog() + if currentWD != nil { + currentWD.Shutdown() + // Wait a bit for shutdown to complete + time.Sleep(100 * time.Millisecond) + } + + // Start watchdog with new settings + return a.startWatchdog() +} diff --git a/core/cli/api/p2p.go b/core/cli/api/p2p.go deleted file mode 100644 index 9e94e94d6eb3..000000000000 --- a/core/cli/api/p2p.go +++ /dev/null @@ -1,87 +0,0 @@ -package cli_api - -import ( - "context" - "fmt" - "net" - "os" - "strings" - - "github.com/mudler/LocalAI/core/application" - "github.com/mudler/LocalAI/core/p2p" - "github.com/mudler/LocalAI/core/schema" - "github.com/mudler/edgevpn/pkg/node" - - "github.com/rs/zerolog/log" -) - -func StartP2PStack(ctx context.Context, address, token, networkID string, federated bool, app *application.Application) error { - var n *node.Node - // Here we are avoiding creating multiple nodes: - // - if the federated mode is enabled, we create a federated node and expose a service - // - exposing a service creates a node with specific options, and we don't want to create another node - - // If the federated mode is enabled, we expose a service to the local instance running - // at r.Address - if federated { - _, port, err := net.SplitHostPort(address) - if err != nil { - return err - } - - // Here a new node is created and started - // and a service is exposed by the node - node, err := p2p.ExposeService(ctx, "localhost", port, token, p2p.NetworkID(networkID, p2p.FederatedID)) - if err != nil { - return err - } - - if err := p2p.ServiceDiscoverer(ctx, node, token, p2p.NetworkID(networkID, p2p.FederatedID), nil, false); err != nil { - return err - } - - n = node - - // start node sync in the background - if err := p2p.Sync(ctx, node, app); err != nil { - return err - } - } - - // If the p2p mode is enabled, we start the service discovery - if token != "" { - // If a node wasn't created previously, create it - if n == nil { - node, err := p2p.NewNode(token) - if err != nil { - return err - } - err = node.Start(ctx) - if err != nil { - return fmt.Errorf("starting new node: %w", err) - } - n = node - } - - // Attach a ServiceDiscoverer to the p2p node - log.Info().Msg("Starting P2P server discovery...") - if err := p2p.ServiceDiscoverer(ctx, n, token, p2p.NetworkID(networkID, p2p.WorkerID), func(serviceID string, node schema.NodeData) { - var tunnelAddresses []string - for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(networkID, p2p.WorkerID)) { - if v.IsOnline() { - tunnelAddresses = append(tunnelAddresses, v.TunnelAddress) - } else { - log.Info().Msgf("Node %s is offline", v.ID) - } - } - tunnelEnvVar := strings.Join(tunnelAddresses, ",") - - os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar) - log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar) - }, true); err != nil { - return err - } - } - - return nil -} diff --git a/core/cli/run.go b/core/cli/run.go index 9c21eaa4b08f..b3292620c65b 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -8,7 +8,6 @@ import ( "time" "github.com/mudler/LocalAI/core/application" - cli_api "github.com/mudler/LocalAI/core/cli/api" cliContext "github.com/mudler/LocalAI/core/cli/context" "github.com/mudler/LocalAI/core/config" "github.com/mudler/LocalAI/core/http" @@ -98,6 +97,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { } opts := []config.AppOption{ + config.WithContext(context.Background()), config.WithConfigFile(r.ModelsConfigFile), config.WithJSONStringPreload(r.PreloadModels), config.WithYAMLConfigPreload(r.PreloadModelsConfig), @@ -128,6 +128,12 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithLoadToMemory(r.LoadToMemory), config.WithMachineTag(r.MachineTag), config.WithAPIAddress(r.Address), + config.WithTunnelCallback(func(tunnels []string) { + tunnelEnvVar := strings.Join(tunnels, ",") + // TODO: this is very specific to llama.cpp, we should have a more generic way to set the environment variable + os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar) + log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar) + }), } if r.DisableMetricsEndpoint { @@ -152,7 +158,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { opts = append(opts, config.WithP2PToken(token)) } - backgroundCtx := context.Background() + if r.Federated { + opts = append(opts, config.EnableFederated) + } idleWatchDog := r.EnableWatchdogIdle busyWatchDog := r.EnableWatchdogBusy @@ -222,8 +230,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { return err } - if err := cli_api.StartP2PStack(backgroundCtx, r.Address, token, r.Peer2PeerNetworkID, r.Federated, app); err != nil { - return err + if token != "" { + if err := app.StartP2P(); err != nil { + return err + } } signals.RegisterGracefulTerminationHandler(func() { diff --git a/core/config/application_config.go b/core/config/application_config.go index d98c8ba4069b..1a5bc0be6f82 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -33,6 +33,7 @@ type ApplicationConfig struct { ApiKeys []string P2PToken string P2PNetworkID string + Federated bool DisableWebUI bool EnforcePredownloadScans bool @@ -65,6 +66,8 @@ type ApplicationConfig struct { MachineTag string APIAddress string + + TunnelCallback func(tunnels []string) } type AppOption func(*ApplicationConfig) @@ -180,6 +183,10 @@ var EnableBackendGalleriesAutoload = func(o *ApplicationConfig) { o.AutoloadBackendGalleries = true } +var EnableFederated = func(o *ApplicationConfig) { + o.Federated = true +} + func WithExternalBackend(name string, uri string) AppOption { return func(o *ApplicationConfig) { if o.ExternalGRPCBackends == nil { @@ -273,6 +280,12 @@ func WithContextSize(ctxSize int) AppOption { } } +func WithTunnelCallback(callback func(tunnels []string)) AppOption { + return func(o *ApplicationConfig) { + o.TunnelCallback = callback + } +} + func WithF16(f16 bool) AppOption { return func(o *ApplicationConfig) { o.F16 = f16 diff --git a/core/http/endpoints/localai/mcp.go b/core/http/endpoints/localai/mcp.go index c13a388064f1..f23b00e4fabb 100644 --- a/core/http/endpoints/localai/mcp.go +++ b/core/http/endpoints/localai/mcp.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" "fmt" - "strings" + "net" "time" "github.com/labstack/echo/v4" @@ -105,7 +105,10 @@ func MCPStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eval fragment = fragment.AddMessage(message.Role, message.StringContent) } - port := appConfig.APIAddress[strings.LastIndex(appConfig.APIAddress, ":")+1:] + _, port, err := net.SplitHostPort(appConfig.APIAddress) + if err != nil { + return err + } apiKey := "" if len(appConfig.ApiKeys) > 0 { apiKey = appConfig.ApiKeys[0] diff --git a/core/http/endpoints/localai/settings.go b/core/http/endpoints/localai/settings.go index fe0717cefd7c..508c3b1d2374 100644 --- a/core/http/endpoints/localai/settings.go +++ b/core/http/endpoints/localai/settings.go @@ -10,6 +10,8 @@ import ( "github.com/labstack/echo/v4" "github.com/mudler/LocalAI/core/application" + "github.com/mudler/LocalAI/core/config" + "github.com/mudler/LocalAI/core/p2p" "github.com/rs/zerolog/log" ) @@ -20,24 +22,27 @@ type SettingsResponse struct { } type RuntimeSettings struct { - WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` - WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` - WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` - WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` - WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` - SingleBackend *bool `json:"single_backend,omitempty"` - ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` -} - -type CurrentSettings struct { - WatchdogEnabled bool `json:"watchdog_enabled"` - WatchdogIdleEnabled bool `json:"watchdog_idle_enabled"` - WatchdogBusyEnabled bool `json:"watchdog_busy_enabled"` - WatchdogIdleTimeout string `json:"watchdog_idle_timeout"` - WatchdogBusyTimeout string `json:"watchdog_busy_timeout"` - SingleBackend bool `json:"single_backend"` - ParallelBackendRequests bool `json:"parallel_backend_requests"` - Source string `json:"source"` // "env", "file", or "default" + WatchdogEnabled *bool `json:"watchdog_enabled,omitempty"` + WatchdogIdleEnabled *bool `json:"watchdog_idle_enabled,omitempty"` + WatchdogBusyEnabled *bool `json:"watchdog_busy_enabled,omitempty"` + WatchdogIdleTimeout *string `json:"watchdog_idle_timeout,omitempty"` + WatchdogBusyTimeout *string `json:"watchdog_busy_timeout,omitempty"` + SingleBackend *bool `json:"single_backend,omitempty"` + ParallelBackendRequests *bool `json:"parallel_backend_requests,omitempty"` + Threads *int `json:"threads,omitempty"` + ContextSize *int `json:"context_size,omitempty"` + F16 *bool `json:"f16,omitempty"` + Debug *bool `json:"debug,omitempty"` + CORS *bool `json:"cors,omitempty"` + CSRF *bool `json:"csrf,omitempty"` + CORSAllowOrigins *string `json:"cors_allow_origins,omitempty"` + P2PToken *string `json:"p2p_token,omitempty"` + P2PNetworkID *string `json:"p2p_network_id,omitempty"` + Federated *bool `json:"federated,omitempty"` + Galleries *[]config.Gallery `json:"galleries,omitempty"` + BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"` + AutoloadGalleries *bool `json:"autoload_galleries,omitempty"` + AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"` } // GetSettingsEndpoint returns current settings with precedence (env > file > defaults) @@ -51,50 +56,62 @@ func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc { startupConfig = appConfig } - settings := CurrentSettings{} + settings := RuntimeSettings{} - // Determine if values came from env vars by comparing with startup config - // If current values match startup values, they came from env vars (or defaults) - // If current values differ from startup, file changed them (so not from env var) - envWatchdogIdle := appConfig.WatchDogIdle == startupConfig.WatchDogIdle - envWatchdogBusy := appConfig.WatchDogBusy == startupConfig.WatchDogBusy - envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupConfig.WatchDogIdleTimeout - envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupConfig.WatchDogBusyTimeout - envSingleBackend := appConfig.SingleBackend == startupConfig.SingleBackend - envParallelRequests := appConfig.ParallelBackendRequests == startupConfig.ParallelBackendRequests + // Set all current values (using pointers for RuntimeSettings) + watchdogIdle := appConfig.WatchDogIdle + watchdogBusy := appConfig.WatchDogBusy + watchdogEnabled := appConfig.WatchDog + singleBackend := appConfig.SingleBackend + parallelBackendRequests := appConfig.ParallelBackendRequests + threads := appConfig.Threads + contextSize := appConfig.ContextSize + f16 := appConfig.F16 + debug := appConfig.Debug + cors := appConfig.CORS + csrf := appConfig.CSRF + corsAllowOrigins := appConfig.CORSAllowOrigins + p2pToken := appConfig.P2PToken + p2pNetworkID := appConfig.P2PNetworkID + federated := appConfig.Federated + galleries := appConfig.Galleries + backendGalleries := appConfig.BackendGalleries + autoloadGalleries := appConfig.AutoloadGalleries + autoloadBackendGalleries := appConfig.AutoloadBackendGalleries - // Determine source: if any setting matches startup config, it's from env (or default) - // If any setting differs from startup, it's from file - settings.WatchdogIdleEnabled = appConfig.WatchDogIdle - settings.WatchdogBusyEnabled = appConfig.WatchDogBusy - settings.WatchdogEnabled = appConfig.WatchDog - settings.SingleBackend = appConfig.SingleBackend - settings.ParallelBackendRequests = appConfig.ParallelBackendRequests + settings.WatchdogIdleEnabled = &watchdogIdle + settings.WatchdogBusyEnabled = &watchdogBusy + settings.WatchdogEnabled = &watchdogEnabled + settings.SingleBackend = &singleBackend + settings.ParallelBackendRequests = ¶llelBackendRequests + settings.Threads = &threads + settings.ContextSize = &contextSize + settings.F16 = &f16 + settings.Debug = &debug + settings.CORS = &cors + settings.CSRF = &csrf + settings.CORSAllowOrigins = &corsAllowOrigins + settings.P2PToken = &p2pToken + settings.P2PNetworkID = &p2pNetworkID + settings.Federated = &federated + settings.Galleries = &galleries + settings.BackendGalleries = &backendGalleries + settings.AutoloadGalleries = &autoloadGalleries + settings.AutoloadBackendGalleries = &autoloadBackendGalleries + var idleTimeout, busyTimeout string if appConfig.WatchDogIdleTimeout > 0 { - settings.WatchdogIdleTimeout = appConfig.WatchDogIdleTimeout.String() + idleTimeout = appConfig.WatchDogIdleTimeout.String() } else { - settings.WatchdogIdleTimeout = "15m" // default + idleTimeout = "15m" // default } - if appConfig.WatchDogBusyTimeout > 0 { - settings.WatchdogBusyTimeout = appConfig.WatchDogBusyTimeout.String() + busyTimeout = appConfig.WatchDogBusyTimeout.String() } else { - settings.WatchdogBusyTimeout = "5m" // default - } - - // Determine overall source: if all settings match startup, it's "env" or "default" - // If any setting differs, it's "file" - if envWatchdogIdle && envWatchdogBusy && envWatchdogIdleTimeout && envWatchdogBusyTimeout && envSingleBackend && envParallelRequests { - // All match startup - check if they're at defaults - if !appConfig.WatchDog && !appConfig.SingleBackend && !appConfig.ParallelBackendRequests && appConfig.WatchDogIdleTimeout == 0 && appConfig.WatchDogBusyTimeout == 0 { - settings.Source = "default" - } else { - settings.Source = "env" - } - } else { - settings.Source = "file" + busyTimeout = "5m" // default } + settings.WatchdogIdleTimeout = &idleTimeout + settings.WatchdogBusyTimeout = &busyTimeout return c.JSON(http.StatusOK, settings) } @@ -111,23 +128,6 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { startupConfig = appConfig } - // Check if env vars are set by comparing with startup config - // If current values match startup values, they came from env vars (or defaults) - // If current values differ from startup, file changed them (so not from env var) - envWatchdogIdle := appConfig.WatchDogIdle == startupConfig.WatchDogIdle - envWatchdogBusy := appConfig.WatchDogBusy == startupConfig.WatchDogBusy - envWatchdogIdleTimeout := appConfig.WatchDogIdleTimeout == startupConfig.WatchDogIdleTimeout - envWatchdogBusyTimeout := appConfig.WatchDogBusyTimeout == startupConfig.WatchDogBusyTimeout - envSingleBackend := appConfig.SingleBackend == startupConfig.SingleBackend - envParallelRequests := appConfig.ParallelBackendRequests == startupConfig.ParallelBackendRequests - - if envWatchdogIdle || envWatchdogBusy || envWatchdogIdleTimeout || envWatchdogBusyTimeout || envSingleBackend || envParallelRequests { - return c.JSON(http.StatusBadRequest, SettingsResponse{ - Success: false, - Error: "Cannot update settings: environment variables are set and take precedence. Please unset environment variables first.", - }) - } - body, err := io.ReadAll(c.Request().Body) if err != nil { return c.JSON(http.StatusBadRequest, SettingsResponse{ @@ -188,7 +188,7 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { }) } - // Apply settings immediately + // Apply settings immediately, checking env var overrides per field watchdogChanged := false if settings.WatchdogEnabled != nil { appConfig.WatchDog = *settings.WatchdogEnabled @@ -224,15 +224,97 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { if settings.ParallelBackendRequests != nil { appConfig.ParallelBackendRequests = *settings.ParallelBackendRequests } + if settings.Threads != nil { + appConfig.Threads = *settings.Threads + } + if settings.ContextSize != nil { + appConfig.ContextSize = *settings.ContextSize + } + if settings.F16 != nil { + appConfig.F16 = *settings.F16 + } + if settings.Debug != nil { + appConfig.Debug = *settings.Debug + } + if settings.CORS != nil { + appConfig.CORS = *settings.CORS + } + if settings.CSRF != nil { + appConfig.CSRF = *settings.CSRF + } + if settings.CORSAllowOrigins != nil { + appConfig.CORSAllowOrigins = *settings.CORSAllowOrigins + } + if settings.P2PToken != nil { + appConfig.P2PToken = *settings.P2PToken + } + if settings.P2PNetworkID != nil { + appConfig.P2PNetworkID = *settings.P2PNetworkID + } + if settings.Federated != nil { + appConfig.Federated = *settings.Federated + } + if settings.Galleries != nil { + appConfig.Galleries = *settings.Galleries + } + if settings.BackendGalleries != nil { + appConfig.BackendGalleries = *settings.BackendGalleries + } + if settings.AutoloadGalleries != nil { + appConfig.AutoloadGalleries = *settings.AutoloadGalleries + } + if settings.AutoloadBackendGalleries != nil { + appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries + } // Restart watchdog if settings changed if watchdogChanged { - if err := app.RestartWatchdog(); err != nil { - log.Error().Err(err).Msg("Failed to restart watchdog") - return c.JSON(http.StatusInternalServerError, SettingsResponse{ - Success: false, - Error: "Settings saved but failed to restart watchdog: " + err.Error(), - }) + if settings.WatchdogEnabled != nil && !*settings.WatchdogEnabled || settings.WatchdogEnabled == nil { + if err := app.StopWatchdog(); err != nil { + log.Error().Err(err).Msg("Failed to stop watchdog") + return c.JSON(http.StatusInternalServerError, SettingsResponse{ + Success: false, + Error: "Settings saved but failed to stop watchdog: " + err.Error(), + }) + } + } else { + if err := app.RestartWatchdog(); err != nil { + log.Error().Err(err).Msg("Failed to restart watchdog") + return c.JSON(http.StatusInternalServerError, SettingsResponse{ + Success: false, + Error: "Settings saved but failed to restart watchdog: " + err.Error(), + }) + } + } + } + + // Restart P2P if P2P settings changed + p2pChanged := settings.P2PToken != nil || settings.P2PNetworkID != nil || settings.Federated != nil + if p2pChanged { + if settings.P2PToken != nil && *settings.P2PToken == "" { + // stop P2P + if err := app.StopP2P(); err != nil { + log.Error().Err(err).Msg("Failed to stop P2P") + return c.JSON(http.StatusInternalServerError, SettingsResponse{ + Success: false, + Error: "Settings saved but failed to stop P2P: " + err.Error(), + }) + } + } else { + if settings.P2PToken != nil && *settings.P2PToken == "0" { + // generate a token if users sets 0 (disabled) + token := p2p.GenerateToken(60, 60) + settings.P2PToken = &token + appConfig.P2PToken = token + } + // Stop existing P2P + if err := app.RestartP2P(); err != nil { + log.Error().Err(err).Msg("Failed to stop P2P") + return c.JSON(http.StatusInternalServerError, SettingsResponse{ + Success: false, + Error: "Settings saved but failed to stop P2P: " + err.Error(), + }) + } } } diff --git a/core/http/endpoints/openai/mcp.go b/core/http/endpoints/openai/mcp.go index 264403c31d87..9318aa6a7bd3 100644 --- a/core/http/endpoints/openai/mcp.go +++ b/core/http/endpoints/openai/mcp.go @@ -5,7 +5,7 @@ import ( "encoding/json" "errors" "fmt" - "strings" + "net" "time" "github.com/labstack/echo/v4" @@ -75,7 +75,11 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, fragment = fragment.AddMessage(message.Role, message.StringContent) } - port := appConfig.APIAddress[strings.LastIndex(appConfig.APIAddress, ":")+1:] + _, port, err := net.SplitHostPort(appConfig.APIAddress) + if err != nil { + return err + } + apiKey := "" if appConfig.ApiKeys != nil { apiKey = appConfig.ApiKeys[0] diff --git a/core/http/views/settings.html b/core/http/views/settings.html index 527c8b09c922..b1719a9ca5a3 100644 --- a/core/http/views/settings.html +++ b/core/http/views/settings.html @@ -166,6 +166,216 @@

+ +
+

+ + Performance Settings +

+

+ Configure default performance parameters for models +

+ +
+ +
+ +

Number of threads to use for model inference (0 = auto)

+ +
+ + +
+ +

Default context window size for models

+ +
+ + +
+
+ +

Use 16-bit floating point precision

+
+ +
+ + +
+
+ +

Enable debug logging

+
+ +
+
+
+ + +
+

+ + API Settings +

+

+ Configure CORS and CSRF protection +

+ +
+ +
+
+ +

Enable Cross-Origin Resource Sharing

+
+ +
+ + +
+ +

Comma-separated list of allowed origins

+ +
+ + +
+
+ +

Enable Cross-Site Request Forgery protection

+
+ +
+
+
+ + +
+

+ + P2P Settings +

+

+ Configure peer-to-peer networking +

+ +
+ +
+ +

Authentication token for P2P network (set to 0 to generate a new token)

+ +
+ + +
+ +

Network identifier for P2P connections

+ +
+ + +
+
+ +

Enable federated instance mode

+
+ +
+
+
+ + +
+

+ + Gallery Settings +

+

+ Configure model and backend galleries +

+ +
+ +
+
+ +

Automatically load model galleries on startup

+
+ +
+ + +
+
+ +

Automatically load backend galleries on startup

+
+ +
+ + +
+ +

Array of gallery objects with 'url' and 'name' fields

+ +
+ + +
+ +

Array of backend gallery objects with 'url' and 'name' fields

+ +
+
+
+
@@ -206,7 +416,21 @@

watchdog_idle_timeout: '15m', watchdog_busy_timeout: '5m', single_backend: false, - parallel_backend_requests: false + parallel_backend_requests: false, + threads: 0, + context_size: 512, + f16: false, + debug: false, + cors: false, + csrf: false, + cors_allow_origins: '', + p2p_token: '', + p2p_network_id: '', + federated: false, + autoload_galleries: false, + autoload_backend_galleries: false, + galleries_json: '[]', + backend_galleries_json: '[]' }, sourceInfo: '', saving: false, @@ -228,7 +452,21 @@

watchdog_idle_timeout: data.watchdog_idle_timeout || '15m', watchdog_busy_timeout: data.watchdog_busy_timeout || '5m', single_backend: data.single_backend, - parallel_backend_requests: data.parallel_backend_requests + parallel_backend_requests: data.parallel_backend_requests, + threads: data.threads || 0, + context_size: data.context_size || 512, + f16: data.f16 || false, + debug: data.debug || false, + cors: data.cors || false, + csrf: data.csrf || false, + cors_allow_origins: data.cors_allow_origins || '', + p2p_token: data.p2p_token || '', + p2p_network_id: data.p2p_network_id || '', + federated: data.federated || false, + autoload_galleries: data.autoload_galleries || false, + autoload_backend_galleries: data.autoload_backend_galleries || false, + galleries_json: JSON.stringify(data.galleries || [], null, 2), + backend_galleries_json: JSON.stringify(data.backend_galleries || [], null, 2) }; this.sourceInfo = data.source || 'default'; } else { @@ -277,6 +515,61 @@

if (this.settings.parallel_backend_requests !== undefined) { payload.parallel_backend_requests = this.settings.parallel_backend_requests; } + if (this.settings.threads !== undefined) { + payload.threads = parseInt(this.settings.threads) || 0; + } + if (this.settings.context_size !== undefined) { + payload.context_size = parseInt(this.settings.context_size) || 512; + } + if (this.settings.f16 !== undefined) { + payload.f16 = this.settings.f16; + } + if (this.settings.debug !== undefined) { + payload.debug = this.settings.debug; + } + if (this.settings.cors !== undefined) { + payload.cors = this.settings.cors; + } + if (this.settings.csrf !== undefined) { + payload.csrf = this.settings.csrf; + } + if (this.settings.cors_allow_origins !== undefined) { + payload.cors_allow_origins = this.settings.cors_allow_origins; + } + if (this.settings.p2p_token !== undefined) { + payload.p2p_token = this.settings.p2p_token; + } + if (this.settings.p2p_network_id !== undefined) { + payload.p2p_network_id = this.settings.p2p_network_id; + } + if (this.settings.federated !== undefined) { + payload.federated = this.settings.federated; + } + if (this.settings.autoload_galleries !== undefined) { + payload.autoload_galleries = this.settings.autoload_galleries; + } + if (this.settings.autoload_backend_galleries !== undefined) { + payload.autoload_backend_galleries = this.settings.autoload_backend_galleries; + } + // Parse galleries JSON + if (this.settings.galleries_json) { + try { + payload.galleries = JSON.parse(this.settings.galleries_json); + } catch (e) { + this.addNotification('Invalid galleries JSON: ' + e.message, 'error'); + this.saving = false; + return; + } + } + if (this.settings.backend_galleries_json) { + try { + payload.backend_galleries = JSON.parse(this.settings.backend_galleries_json); + } catch (e) { + this.addNotification('Invalid backend galleries JSON: ' + e.message, 'error'); + this.saving = false; + return; + } + } const response = await fetch('/api/settings', { method: 'POST', diff --git a/core/p2p/sync.go b/core/p2p/sync.go deleted file mode 100644 index 44efe93d8224..000000000000 --- a/core/p2p/sync.go +++ /dev/null @@ -1,102 +0,0 @@ -package p2p - -import ( - "context" - "slices" - "time" - - "github.com/google/uuid" - "github.com/mudler/LocalAI/core/application" - "github.com/mudler/LocalAI/core/gallery" - "github.com/mudler/LocalAI/core/services" - - "github.com/mudler/edgevpn/pkg/node" - zlog "github.com/rs/zerolog/log" -) - -func syncState(ctx context.Context, n *node.Node, app *application.Application) error { - zlog.Debug().Msg("[p2p-sync] Syncing state") - - whatWeHave := []string{} - for _, model := range app.ModelConfigLoader().GetAllModelsConfigs() { - whatWeHave = append(whatWeHave, model.Name) - } - - ledger, _ := n.Ledger() - currentData := ledger.CurrentData() - zlog.Debug().Msgf("[p2p-sync] Current data: %v", currentData) - data, exists := ledger.GetKey("shared_state", "models") - if !exists { - ledger.AnnounceUpdate(ctx, time.Minute, "shared_state", "models", whatWeHave) - zlog.Debug().Msgf("No models found in the ledger, announced our models: %v", whatWeHave) - } - - models := []string{} - if err := data.Unmarshal(&models); err != nil { - zlog.Warn().Err(err).Msg("error unmarshalling models") - return nil - } - - zlog.Debug().Msgf("[p2p-sync] Models that are present in this instance: %v\nModels that are in the ledger: %v", whatWeHave, models) - - // Sync with our state - whatIsNotThere := []string{} - for _, model := range whatWeHave { - if !slices.Contains(models, model) { - whatIsNotThere = append(whatIsNotThere, model) - } - } - if len(whatIsNotThere) > 0 { - zlog.Debug().Msgf("[p2p-sync] Announcing our models: %v", append(models, whatIsNotThere...)) - ledger.AnnounceUpdate( - ctx, - 1*time.Minute, - "shared_state", - "models", - append(models, whatIsNotThere...), - ) - } - - // Check if we have a model that is not in our state, otherwise install it - for _, model := range models { - if slices.Contains(whatWeHave, model) { - zlog.Debug().Msgf("[p2p-sync] Model %s is already present in this instance", model) - continue - } - - // we install model - zlog.Info().Msgf("[p2p-sync] Installing model which is not present in this instance: %s", model) - - uuid, err := uuid.NewUUID() - if err != nil { - zlog.Error().Err(err).Msg("error generating UUID") - continue - } - - app.GalleryService().ModelGalleryChannel <- services.GalleryOp[gallery.GalleryModel, gallery.ModelConfig]{ - ID: uuid.String(), - GalleryElementName: model, - Galleries: app.ApplicationConfig().Galleries, - BackendGalleries: app.ApplicationConfig().BackendGalleries, - } - } - - return nil -} - -func Sync(ctx context.Context, n *node.Node, app *application.Application) error { - go func() { - for { - select { - case <-ctx.Done(): - return - case <-time.After(1 * time.Minute): - if err := syncState(ctx, n, app); err != nil { - zlog.Error().Err(err).Msg("error syncing state") - } - } - - } - }() - return nil -} diff --git a/pkg/model/watchdog.go b/pkg/model/watchdog.go index 483fc7304713..e279a9edc104 100644 --- a/pkg/model/watchdog.go +++ b/pkg/model/watchdog.go @@ -51,6 +51,7 @@ func NewWatchDog(pm ProcessManager, timeoutBusy, timeoutIdle time.Duration, busy func (wd *WatchDog) Shutdown() { wd.Lock() defer wd.Unlock() + log.Info().Msg("[WatchDog] Shutting down watchdog") wd.stop <- true } From cf87166717678fefaaf15ca74f5c33b60e338ad3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 20 Nov 2025 19:00:20 +0100 Subject: [PATCH 4/9] Add API Keys handling Signed-off-by: Ettore Di Giacinto --- core/application/config_file_watcher.go | 10 ++++++ core/http/endpoints/localai/settings.go | 15 +++++++++ core/http/views/settings.html | 43 +++++++++++++++++++++++-- 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/core/application/config_file_watcher.go b/core/application/config_file_watcher.go index bcbe2eb51ccd..0129828cac5f 100644 --- a/core/application/config_file_watcher.go +++ b/core/application/config_file_watcher.go @@ -205,6 +205,7 @@ type runtimeSettings struct { BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"` AutoloadGalleries *bool `json:"autoload_galleries,omitempty"` AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"` + ApiKeys *[]string `json:"api_keys,omitempty"` } func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHandler { @@ -318,6 +319,15 @@ func readRuntimeSettingsJson(startupAppConfig config.ApplicationConfig) fileHand if settings.AutoloadBackendGalleries != nil && !envAutoloadBackendGalleries { appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries } + if settings.ApiKeys != nil { + // API keys from env vars (startup) should be kept, runtime settings keys replace all runtime keys + // If runtime_settings.json specifies ApiKeys (even if empty), it replaces all runtime keys + // Start with env keys, then add runtime_settings.json keys (which may be empty to clear them) + envKeys := startupAppConfig.ApiKeys + runtimeKeys := *settings.ApiKeys + // Replace all runtime keys with what's in runtime_settings.json + appConfig.ApiKeys = append(envKeys, runtimeKeys...) + } // If watchdog is enabled via file but not via env, ensure WatchDog flag is set if !envWatchdogIdle && !envWatchdogBusy { diff --git a/core/http/endpoints/localai/settings.go b/core/http/endpoints/localai/settings.go index 508c3b1d2374..5145796666c3 100644 --- a/core/http/endpoints/localai/settings.go +++ b/core/http/endpoints/localai/settings.go @@ -43,6 +43,7 @@ type RuntimeSettings struct { BackendGalleries *[]config.Gallery `json:"backend_galleries,omitempty"` AutoloadGalleries *bool `json:"autoload_galleries,omitempty"` AutoloadBackendGalleries *bool `json:"autoload_backend_galleries,omitempty"` + ApiKeys *[]string `json:"api_keys"` // No omitempty - we need to save empty arrays to clear keys } // GetSettingsEndpoint returns current settings with precedence (env > file > defaults) @@ -78,6 +79,7 @@ func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc { backendGalleries := appConfig.BackendGalleries autoloadGalleries := appConfig.AutoloadGalleries autoloadBackendGalleries := appConfig.AutoloadBackendGalleries + apiKeys := appConfig.ApiKeys settings.WatchdogIdleEnabled = &watchdogIdle settings.WatchdogBusyEnabled = &watchdogBusy @@ -98,6 +100,7 @@ func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc { settings.BackendGalleries = &backendGalleries settings.AutoloadGalleries = &autoloadGalleries settings.AutoloadBackendGalleries = &autoloadBackendGalleries + settings.ApiKeys = &apiKeys var idleTimeout, busyTimeout string if appConfig.WatchDogIdleTimeout > 0 { @@ -266,6 +269,18 @@ func UpdateSettingsEndpoint(app *application.Application) echo.HandlerFunc { if settings.AutoloadBackendGalleries != nil { appConfig.AutoloadBackendGalleries = *settings.AutoloadBackendGalleries } + if settings.ApiKeys != nil { + // API keys from env vars (startup) should be kept, runtime settings keys are added + // Combine startup keys (env vars) with runtime settings keys + envKeys := startupConfig.ApiKeys + runtimeKeys := *settings.ApiKeys + // Merge: env keys first (they take precedence), then runtime keys + appConfig.ApiKeys = append(envKeys, runtimeKeys...) + + // Note: We only save to runtime_settings.json (not api_keys.json) to avoid duplication + // The runtime_settings.json is the unified config file. If api_keys.json exists, + // it will be loaded first, but runtime_settings.json takes precedence and deduplicates. + } // Restart watchdog if settings changed if watchdogChanged { diff --git a/core/http/views/settings.html b/core/http/views/settings.html index b1719a9ca5a3..b27c705579d9 100644 --- a/core/http/views/settings.html +++ b/core/http/views/settings.html @@ -317,6 +317,30 @@

+ +
+

+ + API Keys +

+

+ Manage API keys for authentication. Keys from environment variables are always included. +

+ +
+ +
+ +

List of API keys (one per line or comma-separated)

+ +

Note: API keys are sensitive. Handle with care.

+
+
+
+

@@ -430,7 +454,8 @@

autoload_galleries: false, autoload_backend_galleries: false, galleries_json: '[]', - backend_galleries_json: '[]' + backend_galleries_json: '[]', + api_keys_text: '' }, sourceInfo: '', saving: false, @@ -466,7 +491,8 @@

autoload_galleries: data.autoload_galleries || false, autoload_backend_galleries: data.autoload_backend_galleries || false, galleries_json: JSON.stringify(data.galleries || [], null, 2), - backend_galleries_json: JSON.stringify(data.backend_galleries || [], null, 2) + backend_galleries_json: JSON.stringify(data.backend_galleries || [], null, 2), + api_keys_text: (data.api_keys || []).join('\n') }; this.sourceInfo = data.source || 'default'; } else { @@ -551,6 +577,19 @@

if (this.settings.autoload_backend_galleries !== undefined) { payload.autoload_backend_galleries = this.settings.autoload_backend_galleries; } + // Parse API keys from text (split by newline or comma, trim whitespace, filter empty) + if (this.settings.api_keys_text !== undefined) { + const keys = this.settings.api_keys_text + .split(/[\n,]/) + .map(k => k.trim()) + .filter(k => k.length > 0); + if (keys.length > 0) { + payload.api_keys = keys; + } else { + // If empty, send empty array to clear keys + payload.api_keys = []; + } + } // Parse galleries JSON if (this.settings.galleries_json) { try { From 6afc4afdcda1ffa6dc77779ba01c632e37cb241f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 20 Nov 2025 19:08:49 +0100 Subject: [PATCH 5/9] Allow to disable runtime settings Signed-off-by: Ettore Di Giacinto --- core/cli/run.go | 5 +++++ core/config/application_config.go | 6 ++++++ core/http/endpoints/localai/welcome.go | 23 ++++++++++---------- core/http/routes/ui_api.go | 30 ++++++++++++++------------ core/http/views/manage.html | 2 ++ 5 files changed, 41 insertions(+), 25 deletions(-) diff --git a/core/cli/run.go b/core/cli/run.go index b3292620c65b..efb0ee99b9ec 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -51,6 +51,7 @@ type RunCMD struct { UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disables the web user interface. When set to true, the server will only expose API endpoints without serving the web interface" group:"api"` + DisableRuntimeSettings bool `env:"LOCALAI_DISABLE_RUNTIME_SETTINGS,DISABLE_RUNTIME_SETTINGS" default:"false" help:"Disables the runtime settings. When set to true, the server will not load the runtime settings from the runtime_settings.json file" group:"api"` DisablePredownloadScan bool `env:"LOCALAI_DISABLE_PREDOWNLOAD_SCAN" help:"If true, disables the best-effort security scanner before downloading any files." group:"hardening" default:"false"` OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"` UseSubtleKeyComparison bool `env:"LOCALAI_SUBTLE_KEY_COMPARISON" default:"false" help:"If true, API Key validation comparisons will be performed using constant-time comparisons rather than simple equality. This trades off performance on each request for resiliancy against timing attacks." group:"hardening"` @@ -140,6 +141,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { opts = append(opts, config.DisableMetricsEndpoint) } + if r.DisableRuntimeSettings { + opts = append(opts, config.DisableRuntimeSettings) + } + token := "" if r.Peer2Peer || r.Peer2PeerToken != "" { log.Info().Msg("P2P mode enabled") diff --git a/core/config/application_config.go b/core/config/application_config.go index 1a5bc0be6f82..39caa527f008 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -68,6 +68,8 @@ type ApplicationConfig struct { APIAddress string TunnelCallback func(tunnels []string) + + DisableRuntimeSettings bool } type AppOption func(*ApplicationConfig) @@ -155,6 +157,10 @@ var DisableWebUI = func(o *ApplicationConfig) { o.DisableWebUI = true } +var DisableRuntimeSettings = func(o *ApplicationConfig) { + o.DisableRuntimeSettings = true +} + func SetWatchDogBusyTimeout(t time.Duration) AppOption { return func(o *ApplicationConfig) { o.WatchDogBusyTimeout = t diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go index d21d853c41f1..ce197ba05e73 100644 --- a/core/http/endpoints/localai/welcome.go +++ b/core/http/endpoints/localai/welcome.go @@ -43,17 +43,18 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig, processingModels, taskTypes := opcache.GetStatus() summary := map[string]interface{}{ - "Title": "LocalAI API - " + internal.PrintableVersion(), - "Version": internal.PrintableVersion(), - "BaseURL": middleware.BaseURL(c), - "Models": modelsWithoutConfig, - "ModelsConfig": modelConfigs, - "GalleryConfig": galleryConfigs, - "ApplicationConfig": appConfig, - "ProcessingModels": processingModels, - "TaskTypes": taskTypes, - "LoadedModels": loadedModelsMap, - "InstalledBackends": installedBackends, + "Title": "LocalAI API - " + internal.PrintableVersion(), + "Version": internal.PrintableVersion(), + "BaseURL": middleware.BaseURL(c), + "Models": modelsWithoutConfig, + "ModelsConfig": modelConfigs, + "GalleryConfig": galleryConfigs, + "ApplicationConfig": appConfig, + "ProcessingModels": processingModels, + "TaskTypes": taskTypes, + "LoadedModels": loadedModelsMap, + "InstalledBackends": installedBackends, + "DisableRuntimeSettings": appConfig.DisableRuntimeSettings, } contentType := c.Request().Header.Get("Content-Type") diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index cd56c0e5c3ee..95b20410f4b7 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -266,17 +266,17 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model installedModelsCount := len(modelConfigs) + len(modelsWithoutConfig) return c.JSON(200, map[string]interface{}{ - "models": modelsJSON, - "repositories": appConfig.Galleries, - "allTags": tags, - "processingModels": processingModelsData, - "taskTypes": taskTypes, - "availableModels": totalModels, - "installedModels": installedModelsCount, - "currentPage": pageNum, - "totalPages": totalPages, - "prevPage": prevPage, - "nextPage": nextPage, + "models": modelsJSON, + "repositories": appConfig.Galleries, + "allTags": tags, + "processingModels": processingModelsData, + "taskTypes": taskTypes, + "availableModels": totalModels, + "installedModels": installedModelsCount, + "currentPage": pageNum, + "totalPages": totalPages, + "prevPage": prevPage, + "nextPage": nextPage, }) }) @@ -805,7 +805,9 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model }) }) - // Settings API - app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance)) - app.POST("/api/settings", localai.UpdateSettingsEndpoint(applicationInstance)) + if !appConfig.DisableRuntimeSettings { + // Settings API + app.GET("/api/settings", localai.GetSettingsEndpoint(applicationInstance)) + app.POST("/api/settings", localai.UpdateSettingsEndpoint(applicationInstance)) + } } diff --git a/core/http/views/manage.html b/core/http/views/manage.html index 3a460dff639e..65f5c7d62771 100644 --- a/core/http/views/manage.html +++ b/core/http/views/manage.html @@ -67,11 +67,13 @@

Backend Gallery + {{ if not .DisableRuntimeSettings }} Settings + {{ end }}

From 8dd3d2762d146a412ea1c434b21a742dfb368692 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 20 Nov 2025 19:13:12 +0100 Subject: [PATCH 6/9] Documentation Signed-off-by: Ettore Di Giacinto --- docs/content/advanced/vram-management.md | 8 + docs/content/features/_index.en.md | 1 + docs/content/features/model-gallery.md | 8 +- docs/content/features/runtime-settings.md | 180 ++++++++++++++++++++++ docs/content/reference/cli-reference.md | 3 +- 5 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 docs/content/features/runtime-settings.md diff --git a/docs/content/advanced/vram-management.md b/docs/content/advanced/vram-management.md index a9f846ef9f16..557e51bec0ec 100644 --- a/docs/content/advanced/vram-management.md +++ b/docs/content/advanced/vram-management.md @@ -48,12 +48,15 @@ curl http://localhost:8080/v1/chat/completions -d '{"model": "model-b", ...}' For more flexible memory management, LocalAI provides watchdog mechanisms that automatically unload models based on their activity state. This allows multiple models to be loaded simultaneously, but automatically frees memory when models become inactive or stuck. +> **Note:** Watchdog settings can be configured via the [Runtime Settings]({{%relref "features/runtime-settings#watchdog-settings" %}}) web interface, which allows you to adjust settings without restarting the application. + ### Idle Watchdog The idle watchdog monitors models that haven't been used for a specified period and automatically unloads them to free VRAM. #### Configuration +Via environment variables or CLI: ```bash LOCALAI_WATCHDOG_IDLE=true ./local-ai @@ -62,12 +65,15 @@ LOCALAI_WATCHDOG_IDLE=true LOCALAI_WATCHDOG_IDLE_TIMEOUT=10m ./local-ai ./local-ai --enable-watchdog-idle --watchdog-idle-timeout=10m ``` +Via web UI: Navigate to Settings → Watchdog Settings and enable "Watchdog Idle Enabled" with your desired timeout. + ### Busy Watchdog The busy watchdog monitors models that have been processing requests for an unusually long time and terminates them if they exceed a threshold. This is useful for detecting and recovering from stuck or hung backends. #### Configuration +Via environment variables or CLI: ```bash LOCALAI_WATCHDOG_BUSY=true ./local-ai @@ -76,6 +82,8 @@ LOCALAI_WATCHDOG_BUSY=true LOCALAI_WATCHDOG_BUSY_TIMEOUT=10m ./local-ai ./local-ai --enable-watchdog-busy --watchdog-busy-timeout=10m ``` +Via web UI: Navigate to Settings → Watchdog Settings and enable "Watchdog Busy Enabled" with your desired timeout. + ### Combined Configuration You can enable both watchdogs simultaneously for comprehensive memory management: diff --git a/docs/content/features/_index.en.md b/docs/content/features/_index.en.md index 98be6d04ab54..1e93d2182966 100644 --- a/docs/content/features/_index.en.md +++ b/docs/content/features/_index.en.md @@ -32,6 +32,7 @@ LocalAI provides a comprehensive set of features for running AI models locally. - **[Stores](stores/)** - Vector similarity search for embeddings - **[Model Gallery](model-gallery/)** - Browse and install pre-configured models - **[Backends](backends/)** - Learn about available backends and how to manage them +- **[Runtime Settings](runtime-settings/)** - Configure application settings via web UI without restarting ## Getting Started diff --git a/docs/content/features/model-gallery.md b/docs/content/features/model-gallery.md index 5542a107c4aa..a7665952fe2c 100644 --- a/docs/content/features/model-gallery.md +++ b/docs/content/features/model-gallery.md @@ -33,12 +33,18 @@ Navigate the WebUI interface in the "Models" section from the navbar at the top. ## Add other galleries -You can add other galleries by setting the `GALLERIES` environment variable. The `GALLERIES` environment variable is a list of JSON objects, where each object has a `name` and a `url` field. The `name` field is the name of the gallery, and the `url` field is the URL of the gallery's index file, for example: +You can add other galleries by: + +1. **Using the Web UI**: Navigate to the [Runtime Settings]({{%relref "features/runtime-settings#gallery-settings" %}}) page and configure galleries through the interface. + +2. **Using Environment Variables**: Set the `GALLERIES` environment variable. The `GALLERIES` environment variable is a list of JSON objects, where each object has a `name` and a `url` field. The `name` field is the name of the gallery, and the `url` field is the URL of the gallery's index file, for example: ```json GALLERIES=[{"name":"", "url":" Date: Thu, 20 Nov 2025 19:14:13 +0100 Subject: [PATCH 7/9] Small fixups Signed-off-by: Ettore Di Giacinto --- core/http/routes/ui.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index de329c11de06..6ef56550564d 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -22,15 +22,17 @@ func RegisterUIRoutes(app *echo.Echo, app.GET("/", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps)) app.GET("/manage", localai.WelcomeEndpoint(appConfig, cl, ml, processingOps)) - - // Settings page - app.GET("/settings", func(c echo.Context) error { - summary := map[string]interface{}{ - "Title": "LocalAI - Settings", - "BaseURL": middleware.BaseURL(c), - } - return c.Render(200, "views/settings", summary) - }) + + if !appConfig.DisableRuntimeSettings { + // Settings page + app.GET("/settings", func(c echo.Context) error { + summary := map[string]interface{}{ + "Title": "LocalAI - Settings", + "BaseURL": middleware.BaseURL(c), + } + return c.Render(200, "views/settings", summary) + }) + } // P2P app.GET("/p2p/", func(c echo.Context) error { From a70bff5c5d3a17714f09093fb181070fb0c86ddc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 20 Nov 2025 19:32:29 +0100 Subject: [PATCH 8/9] show MCP toggle in index Signed-off-by: Ettore Di Giacinto --- core/http/static/chat.js | 9 ++++ core/http/views/chat.html | 19 ++++++- core/http/views/index.html | 105 +++++++++++++++++++++++++++++++------ 3 files changed, 116 insertions(+), 17 deletions(-) diff --git a/core/http/static/chat.js b/core/http/static/chat.js index fea4b1efac95..62270ddb6682 100644 --- a/core/http/static/chat.js +++ b/core/http/static/chat.js @@ -1382,6 +1382,12 @@ document.addEventListener('DOMContentLoaded', function() { if (chatData) { try { const data = JSON.parse(chatData); + + // Set MCP mode if provided + if (data.mcpMode === true && Alpine.store("chat")) { + Alpine.store("chat").mcpMode = true; + } + const input = document.getElementById('input'); if (input && data.message) { @@ -1417,6 +1423,9 @@ document.addEventListener('DOMContentLoaded', function() { processAndSendMessage(input.value); } }, 500); + } else { + // No message, but might have mcpMode - clear localStorage + localStorage.removeItem('localai_index_chat_data'); } } catch (error) { console.error('Error processing chat data from index:', error); diff --git a/core/http/views/chat.html b/core/http/views/chat.html index dd917612adc0..fadf57d85950 100644 --- a/core/http/views/chat.html +++ b/core/http/views/chat.html @@ -44,8 +44,25 @@ // Function to initialize store function __initChatStore() { if (!window.Alpine) return; + + // Check for MCP mode from localStorage (set by index page) + // Note: We don't clear localStorage here - chat.js will handle that after reading all data + let initialMcpMode = false; + try { + const chatData = localStorage.getItem('localai_index_chat_data'); + if (chatData) { + const parsed = JSON.parse(chatData); + if (parsed.mcpMode === true) { + initialMcpMode = true; + } + } + } catch (e) { + console.error('Error reading MCP mode from localStorage:', e); + } + if (Alpine.store("chat")) { Alpine.store("chat").contextSize = __chatContextSize; + Alpine.store("chat").mcpMode = initialMcpMode; return; } @@ -53,7 +70,7 @@ history: [], languages: [undefined], systemPrompt: "", - mcpMode: false, + mcpMode: initialMcpMode, contextSize: __chatContextSize, tokenUsage: { promptTokens: 0, diff --git a/core/http/views/index.html b/core/http/views/index.html index 460ff322fe90..d4f1b87fafef 100644 --- a/core/http/views/index.html +++ b/core/http/views/index.html @@ -128,6 +128,9 @@

audioFiles: [], textFiles: [], attachedFiles: [], + mcpMode: false, + mcpAvailable: false, + mcpModels: {}, currentPlaceholder: 'Send a message...', placeholderIndex: 0, charIndex: 0, @@ -163,6 +166,8 @@

init() { window.currentPlaceholderText = this.currentPlaceholder; this.startTypingAnimation(); + // Build MCP models map from data attributes + this.buildMCPModelsMap(); // Select first model by default this.$nextTick(() => { const select = this.$el.querySelector('select'); @@ -171,9 +176,43 @@

const firstModelOption = select.options[1]; if (firstModelOption && firstModelOption.value) { this.selectedModel = firstModelOption.value; + this.checkMCPAvailability(); } } }); + // Watch for changes to selectedModel to update MCP availability + this.$watch('selectedModel', () => { + this.checkMCPAvailability(); + }); + }, + buildMCPModelsMap() { + const select = this.$el.querySelector('select'); + if (!select) return; + this.mcpModels = {}; + for (let i = 0; i < select.options.length; i++) { + const option = select.options[i]; + if (option.value) { + const hasMcpAttr = option.getAttribute('data-has-mcp'); + this.mcpModels[option.value] = hasMcpAttr === 'true'; + } + } + // Debug: uncomment to see the MCP models map + // console.log('MCP Models Map:', this.mcpModels); + }, + checkMCPAvailability() { + if (!this.selectedModel) { + this.mcpAvailable = false; + this.mcpMode = false; + return; + } + // Check MCP availability from the map + const hasMCP = this.mcpModels[this.selectedModel] === true; + this.mcpAvailable = hasMCP; + // Debug: uncomment to see what's happening + // console.log('MCP Check:', { model: this.selectedModel, hasMCP, mcpAvailable: this.mcpAvailable, map: this.mcpModels }); + if (!hasMCP) { + this.mcpMode = false; + } }, startTypingAnimation() { if (this.isTyping) return; @@ -268,24 +307,50 @@

} } }"> - +
- + + {{ range .ModelsConfig }} + {{ $cfg := . }} + {{ $hasMCP := or (ne $cfg.MCP.Servers "") (ne $cfg.MCP.Stdio "") }} + {{ range .KnownUsecaseStrings }} + {{ if eq . "FLAG_CHAT" }} + + {{ end }} {{ end }} - {{ end }} - {{ end }} - + {{ end }} + + + +
+ + MCP + +
+
+ + +
+
+ +

Non-streaming mode active. Responses may take longer to process.

+
+
@@ -476,12 +541,20 @@

return; } + // Get MCP mode from checkbox (if available) + let mcpMode = false; + const mcpToggle = document.getElementById('index_mcp_toggle'); + if (mcpToggle && mcpToggle.checked) { + mcpMode = true; + } + // Store message and files in localStorage for chat page to pick up const chatData = { message: message, imageFiles: [], audioFiles: [], - textFiles: [] + textFiles: [], + mcpMode: mcpMode }; // Convert files to base64 for storage From 3c2fb3e6f15780f1f64edc1a38eaa19de3f499e6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 20 Nov 2025 21:51:40 +0100 Subject: [PATCH 9/9] Drop context default Signed-off-by: Ettore Di Giacinto --- core/config/application_config.go | 1 - core/http/endpoints/localai/import_model.go | 2 +- core/http/endpoints/localai/settings.go | 1 - core/http/views/settings.html | 6 +++--- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/core/config/application_config.go b/core/config/application_config.go index 39caa527f008..9a9a8171c1e8 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -78,7 +78,6 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig { opt := &ApplicationConfig{ Context: context.Background(), UploadLimitMB: 15, - ContextSize: 512, Debug: true, } for _, oo := range o { diff --git a/core/http/endpoints/localai/import_model.go b/core/http/endpoints/localai/import_model.go index 77abcdfb60b3..9d8926c0a228 100644 --- a/core/http/endpoints/localai/import_model.go +++ b/core/http/endpoints/localai/import_model.go @@ -145,7 +145,7 @@ func ImportModelEndpoint(cl *config.ModelConfigLoader, appConfig *config.Applica } // Set defaults - modelConfig.SetDefaults() + modelConfig.SetDefaults(appConfig.ToConfigLoaderOptions()...) // Validate the configuration if valid, _ := modelConfig.Validate(); !valid { diff --git a/core/http/endpoints/localai/settings.go b/core/http/endpoints/localai/settings.go index 5145796666c3..62f198a9d049 100644 --- a/core/http/endpoints/localai/settings.go +++ b/core/http/endpoints/localai/settings.go @@ -115,7 +115,6 @@ func GetSettingsEndpoint(app *application.Application) echo.HandlerFunc { } settings.WatchdogIdleTimeout = &idleTimeout settings.WatchdogBusyTimeout = &busyTimeout - return c.JSON(http.StatusOK, settings) } } diff --git a/core/http/views/settings.html b/core/http/views/settings.html index b27c705579d9..b6acaea0dd6e 100644 --- a/core/http/views/settings.html +++ b/core/http/views/settings.html @@ -442,7 +442,7 @@

single_backend: false, parallel_backend_requests: false, threads: 0, - context_size: 512, + context_size: 0, f16: false, debug: false, cors: false, @@ -479,7 +479,7 @@

single_backend: data.single_backend, parallel_backend_requests: data.parallel_backend_requests, threads: data.threads || 0, - context_size: data.context_size || 512, + context_size: data.context_size || 0, f16: data.f16 || false, debug: data.debug || false, cors: data.cors || false, @@ -545,7 +545,7 @@

payload.threads = parseInt(this.settings.threads) || 0; } if (this.settings.context_size !== undefined) { - payload.context_size = parseInt(this.settings.context_size) || 512; + payload.context_size = parseInt(this.settings.context_size) || 0; } if (this.settings.f16 !== undefined) { payload.f16 = this.settings.f16;