From 0f49c225727938baa07d81cefc8a1f93f4366a77 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 10 Nov 2025 13:43:15 +0100
Subject: [PATCH 1/7] feat(ui): show stats in chat, improve style

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/routes/ui.go    |  15 +-
 core/http/static/chat.js  | 115 +++++++++-
 core/http/views/chat.html | 471 ++++++++++++++++++++++++++++----------
 3 files changed, 470 insertions(+), 131 deletions(-)

diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index c781bd88b021..8d8b2cebaf7f 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -91,11 +91,15 @@ func RegisterUIRoutes(app *fiber.App,
 		}
 
 		title := "LocalAI - Chat"
+		var modelContextSize *int
 
 		for _, b := range modelConfigs {
 			if b.HasUsecases(config.FLAG_CHAT) {
 				modelThatCanBeUsed = b.Name
 				title = "LocalAI - Chat with " + modelThatCanBeUsed
+				if b.LLMConfig.ContextSize != nil {
+					modelContextSize = b.LLMConfig.ContextSize
+				}
 				break
 			}
 		}
@@ -107,6 +111,7 @@ func RegisterUIRoutes(app *fiber.App,
 			"GalleryConfig":       galleryConfigs,
 			"ModelsConfig":        modelConfigs,
 			"Model":               modelThatCanBeUsed,
+			"ContextSize":         modelContextSize,
 			"Version":             internal.PrintableVersion(),
 		}
 
@@ -120,6 +125,8 @@ func RegisterUIRoutes(app *fiber.App,
 		modelsWithoutConfig, _ := services.ListModels(cl, ml, config.NoFilterFn, services.LOOSE_ONLY)
 
 		galleryConfigs := map[string]*gallery.ModelConfig{}
+		modelName := c.Params("model")
+		var modelContextSize *int
 
 		for _, m := range modelConfigs {
 			cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
@@ -127,15 +134,19 @@ func RegisterUIRoutes(app *fiber.App,
 				continue
 			}
 			galleryConfigs[m.Name] = cfg
+			if m.Name == modelName && m.LLMConfig.ContextSize != nil {
+				modelContextSize = m.LLMConfig.ContextSize
+			}
 		}
 
 		summary := fiber.Map{
-			"Title":               "LocalAI - Chat with " + c.Params("model"),
+			"Title":               "LocalAI - Chat with " + modelName,
 			"BaseURL":             utils.BaseURL(c),
 			"ModelsConfig":        modelConfigs,
 			"GalleryConfig":       galleryConfigs,
 			"ModelsWithoutConfig": modelsWithoutConfig,
-			"Model":               c.Params("model"),
+			"Model":               modelName,
+			"ContextSize":         modelContextSize,
 			"Version":             internal.PrintableVersion(),
 		}
 
diff --git a/core/http/static/chat.js b/core/http/static/chat.js
index 4517b0baf5bf..f1fe1290b2e6 100644
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -34,15 +34,16 @@ let currentReader = null;
 function toggleLoader(show) {
   const sendButton = document.getElementById('send-button');
   const stopButton = document.getElementById('stop-button');
+  const headerLoadingIndicator = document.getElementById('header-loading-indicator');
   
   if (show) {
     sendButton.style.display = 'none';
     stopButton.style.display = 'block';
-    document.getElementById("input").disabled = true;
+    if (headerLoadingIndicator) headerLoadingIndicator.style.display = 'block';
   } else {
-    document.getElementById("input").disabled = false;
     sendButton.style.display = 'block';
     stopButton.style.display = 'none';
+    if (headerLoadingIndicator) headerLoadingIndicator.style.display = 'none';
     currentAbortController = null;
     currentReader = null;
   }
@@ -171,9 +172,30 @@ function readInputFile() {
 
 function submitPrompt(event) {
   event.preventDefault();
+  
+  const input = document.getElementById("input");
+  if (!input) return;
+
+  const inputValue = input.value;
+  if (!inputValue.trim()) return; // Don't send empty messages
+
+  // If already processing, abort the current request and send the new one
+  if (currentAbortController || currentReader) {
+    // Abort current request
+    stopRequest();
+    // Small delay to ensure cleanup completes
+    setTimeout(() => {
+      // Continue with new request
+      processAndSendMessage(inputValue);
+    }, 100);
+    return;
+  }
+  
+  processAndSendMessage(inputValue);
+}
 
-  const input = document.getElementById("input").value;
-  let fullInput = input;
+function processAndSendMessage(inputValue) {
+  let fullInput = inputValue;
   
   // If there are file contents, append them to the input for the LLM
   if (fileContents.length > 0) {
@@ -184,7 +206,7 @@ function submitPrompt(event) {
   }
   
   // Show file icons in chat if there are files
-  let displayContent = input;
+  let displayContent = inputValue;
   if (currentFileNames.length > 0) {
     displayContent += "\n\n";
     currentFileNames.forEach(fileName => {
@@ -201,7 +223,8 @@ function submitPrompt(event) {
     history[history.length - 1].content = fullInput;
   }
   
-  document.getElementById("input").value = "";
+  const input = document.getElementById("input");
+  if (input) input.value = "";
   const systemPrompt = localStorage.getItem("system_prompt");
   Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); });
   promptGPT(systemPrompt, fullInput);
@@ -242,6 +265,12 @@ function readInputAudio() {
 async function promptGPT(systemPrompt, input) {
   const model = document.getElementById("chat-model").value;
   const mcpMode = Alpine.store("chat").mcpMode;
+  
+  // Reset current request usage tracking for new request
+  if (Alpine.store("chat")) {
+    Alpine.store("chat").tokenUsage.currentRequest = null;
+  }
+  
   toggleLoader(true);
 
   messages = Alpine.store("chat").messages();
@@ -373,6 +402,12 @@ async function promptGPT(systemPrompt, input) {
     // Handle MCP non-streaming response
     try {
       const data = await response.json();
+      
+      // Update token usage if present
+      if (data.usage) {
+        Alpine.store("chat").updateTokenUsage(data.usage);
+      }
+      
       // MCP endpoint returns content in choices[0].text, not choices[0].message.content
       const content = data.choices[0]?.text || "";
       
@@ -456,6 +491,12 @@ async function promptGPT(systemPrompt, input) {
           if (line.startsWith("data: ")) {
             try {
               const jsonData = JSON.parse(line.substring(6));
+              
+              // Update token usage if present
+              if (jsonData.usage) {
+                Alpine.store("chat").updateTokenUsage(jsonData.usage);
+              }
+              
               const token = jsonData.choices[0].delta.content;
 
               if (token) {
@@ -568,14 +609,71 @@ marked.setOptions({
   },
 });
 
+// Alpine store is now initialized in chat.html inline script to ensure it's available before Alpine processes the DOM
+// Only initialize if not already initialized (to avoid duplicate initialization)
 document.addEventListener("alpine:init", () => {
-  Alpine.store("chat", {
+  // Check if store already exists (initialized in chat.html)
+  if (!Alpine.store("chat")) {
+    // Fallback initialization (should not be needed if chat.html loads correctly)
+    Alpine.store("chat", {
     history: [],
     languages: [undefined],
     systemPrompt: "",
     mcpMode: false,
+    contextSize: null,
+    tokenUsage: {
+      promptTokens: 0,
+      completionTokens: 0,
+      totalTokens: 0,
+      currentRequest: null
+    },
     clear() {
       this.history.length = 0;
+      this.tokenUsage = {
+        promptTokens: 0,
+        completionTokens: 0,
+        totalTokens: 0,
+        currentRequest: null
+      };
+    },
+    updateTokenUsage(usage) {
+      // Usage values in streaming responses are cumulative totals for the current request
+      // We track session totals separately and only update when we see new (higher) values
+      if (usage) {
+        const currentRequest = this.tokenUsage.currentRequest || {
+          promptTokens: 0,
+          completionTokens: 0,
+          totalTokens: 0
+        };
+        
+        // Check if this is a new/updated usage (values increased)
+        const isNewUsage = 
+          (usage.prompt_tokens !== undefined && usage.prompt_tokens > currentRequest.promptTokens) ||
+          (usage.completion_tokens !== undefined && usage.completion_tokens > currentRequest.completionTokens) ||
+          (usage.total_tokens !== undefined && usage.total_tokens > currentRequest.totalTokens);
+        
+        if (isNewUsage) {
+          // Update session totals: subtract old request usage, add new
+          this.tokenUsage.promptTokens = this.tokenUsage.promptTokens - currentRequest.promptTokens + (usage.prompt_tokens || 0);
+          this.tokenUsage.completionTokens = this.tokenUsage.completionTokens - currentRequest.completionTokens + (usage.completion_tokens || 0);
+          this.tokenUsage.totalTokens = this.tokenUsage.totalTokens - currentRequest.totalTokens + (usage.total_tokens || 0);
+          
+          // Store current request usage
+          this.tokenUsage.currentRequest = {
+            promptTokens: usage.prompt_tokens || 0,
+            completionTokens: usage.completion_tokens || 0,
+            totalTokens: usage.total_tokens || 0
+          };
+        }
+      }
+    },
+    getRemainingTokens() {
+      if (!this.contextSize) return null;
+      return Math.max(0, this.contextSize - this.tokenUsage.totalTokens);
+    },
+    getContextUsagePercent() {
+      if (!this.contextSize) return null;
+      return Math.min(100, (this.tokenUsage.totalTokens / this.contextSize) * 100);
     },
     add(role, content, image, audio) {
       const N = this.history.length - 1;
@@ -640,5 +738,6 @@ document.addEventListener("alpine:init", () => {
         audio: message.audio,
       }));
     },
-  });
+    });
+  }
 });
diff --git a/core/http/views/chat.html b/core/http/views/chat.html
index 86338402f330..f9ec82835de3 100644
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -28,12 +28,167 @@
 <!doctype html>
 <html lang="en">
   {{template "views/partials/head" .}}
-  <script defer src="static/chat.js"></script>
   <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
   <script>
     // Initialize PDF.js worker
     pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
   </script>
+  <script>
+    // Initialize Alpine store - must run before Alpine processes DOM
+    // Get context size from template
+    var __chatContextSize = null;
+    {{ if .ContextSize }}
+    __chatContextSize = {{ .ContextSize }};
+    {{ end }}
+
+    // Function to initialize store
+    function __initChatStore() {
+      if (!window.Alpine) return;
+      if (Alpine.store("chat")) {
+        Alpine.store("chat").contextSize = __chatContextSize;
+        return;
+      }
+
+      Alpine.store("chat", {
+        history: [],
+        languages: [undefined],
+        systemPrompt: "",
+        mcpMode: false,
+        contextSize: __chatContextSize,
+        tokenUsage: {
+          promptTokens: 0,
+          completionTokens: 0,
+          totalTokens: 0,
+          currentRequest: null
+        },
+        clear() {
+          this.history.length = 0;
+          this.tokenUsage = {
+            promptTokens: 0,
+            completionTokens: 0,
+            totalTokens: 0,
+            currentRequest: null
+          };
+        },
+        updateTokenUsage(usage) {
+          // Usage values in streaming responses are cumulative totals for the current request
+          // We track session totals separately and only update when we see new (higher) values
+          if (usage) {
+            const currentRequest = this.tokenUsage.currentRequest || {
+              promptTokens: 0,
+              completionTokens: 0,
+              totalTokens: 0
+            };
+            
+            // Check if this is a new/updated usage (values increased)
+            const isNewUsage = 
+              (usage.prompt_tokens !== undefined && usage.prompt_tokens > currentRequest.promptTokens) ||
+              (usage.completion_tokens !== undefined && usage.completion_tokens > currentRequest.completionTokens) ||
+              (usage.total_tokens !== undefined && usage.total_tokens > currentRequest.totalTokens);
+            
+            if (isNewUsage) {
+              // Update session totals: subtract old request usage, add new
+              this.tokenUsage.promptTokens = this.tokenUsage.promptTokens - currentRequest.promptTokens + (usage.prompt_tokens || 0);
+              this.tokenUsage.completionTokens = this.tokenUsage.completionTokens - currentRequest.completionTokens + (usage.completion_tokens || 0);
+              this.tokenUsage.totalTokens = this.tokenUsage.totalTokens - currentRequest.totalTokens + (usage.total_tokens || 0);
+              
+              // Store current request usage
+              this.tokenUsage.currentRequest = {
+                promptTokens: usage.prompt_tokens || 0,
+                completionTokens: usage.completion_tokens || 0,
+                totalTokens: usage.total_tokens || 0
+              };
+            }
+          }
+        },
+        getRemainingTokens() {
+          if (!this.contextSize) return null;
+          return Math.max(0, this.contextSize - this.tokenUsage.totalTokens);
+        },
+        getContextUsagePercent() {
+          if (!this.contextSize) return null;
+          return Math.min(100, (this.tokenUsage.totalTokens / this.contextSize) * 100);
+        },
+        add(role, content, image, audio) {
+          const N = this.history.length - 1;
+          // For thinking messages, always create a new message
+          if (role === "thinking") {
+            let c = "";
+            const lines = content.split("\n");
+            lines.forEach((line) => {
+              c += DOMPurify.sanitize(marked.parse(line));
+            });
+            this.history.push({ role, content, html: c, image, audio });
+          }
+          // For other messages, merge if same role
+          else if (this.history.length && this.history[N].role === role) {
+            this.history[N].content += content;
+            this.history[N].html = DOMPurify.sanitize(
+              marked.parse(this.history[N].content)
+            );
+            // Merge new images and audio with existing ones
+            if (image && image.length > 0) {
+              this.history[N].image = [...(this.history[N].image || []), ...image];
+            }
+            if (audio && audio.length > 0) {
+              this.history[N].audio = [...(this.history[N].audio || []), ...audio];
+            }
+          } else {
+            let c = "";
+            const lines = content.split("\n");
+            lines.forEach((line) => {
+              c += DOMPurify.sanitize(marked.parse(line));
+            });
+            this.history.push({ 
+              role, 
+              content, 
+              html: c, 
+              image: image || [], 
+              audio: audio || [] 
+            });
+          }
+          document.getElementById('messages').scrollIntoView(false);
+          const parser = new DOMParser();
+          const html = parser.parseFromString(
+            this.history[this.history.length - 1].html,
+            "text/html"
+          );
+          const code = html.querySelectorAll("pre code");
+          if (!code.length) return;
+          code.forEach((el) => {
+            const language = el.className.split("language-")[1];
+            if (this.languages.includes(language)) return;
+            const script = document.createElement("script");
+            script.src = `https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/${language}.min.js`;
+            document.head.appendChild(script);
+            this.languages.push(language);
+          });
+        },
+        messages() {
+          return this.history.map((message) => ({
+            role: message.role,
+            content: message.content,
+            image: message.image,
+            audio: message.audio,
+          }));
+        },
+      });
+    }
+
+    // Register listener immediately (before Alpine loads)
+    document.addEventListener("alpine:init", __initChatStore);
+    
+    // Also try immediately in case Alpine is already loaded
+    if (document.readyState === 'loading') {
+      document.addEventListener('DOMContentLoaded', function() {
+        if (window.Alpine) __initChatStore();
+      });
+    } else {
+      // DOM already loaded, try immediately
+      if (window.Alpine) __initChatStore();
+    }
+  </script>
+  <script defer src="static/chat.js"></script>
   {{ $allGalleryConfigs:=.GalleryConfig }}
   {{ $model:=.Model}}
   <body class="bg-[#101827] text-[#E5E7EB] flex flex-col h-screen" x-data="{ sidebarOpen: true }">
@@ -141,21 +296,91 @@ <h3 class="text-md font-medium">{{ $model }}</h3>
               <a
                 href="https://localai.io/features/text-generation/"
                 target="_blank"
-                class="w-full flex items-center px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
+                class="w-full flex items-center px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 hover:border-[#38BDF8]/40 transition-colors glow-on-hover"
               >
-                <i class="fas fa-book mr-2"></i> Documentation
+                <i class="fas fa-book mr-2 text-[#38BDF8]"></i> Documentation
               </a>
 
               <a
                 href="browse?term={{.Model}}"
-                class="w-full flex items-center px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
+                class="w-full flex items-center px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 hover:border-[#38BDF8]/40 transition-colors glow-on-hover"
               >
-                <i class="fas fa-brain mr-2"></i> Browse Model
+                <i class="fas fa-brain mr-2 text-[#38BDF8]"></i> Browse Model
               </a>
             </div>
 
             <!-- Settings tab -->
             <div x-show="activeTab === 'settings'" x-data="{ showPromptForm: false }" class="space-y-3">
+              <!-- Token Usage Statistics -->
+              <div class="bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg p-3 space-y-2">
+                <div class="flex items-center justify-between mb-2">
+                  <h4 class="text-sm font-semibold text-[#E5E7EB] flex items-center">
+                    <i class="fas fa-chart-line mr-2 text-[#38BDF8]"></i>
+                    Token Usage
+                  </h4>
+                </div>
+                <div class="space-y-1.5 text-xs">
+                  <div class="flex justify-between text-[#94A3B8]">
+                    <span>Prompt:</span>
+                    <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.promptTokens)"></span>
+                  </div>
+                  <div class="flex justify-between text-[#94A3B8]">
+                    <span>Completion:</span>
+                    <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.completionTokens)"></span>
+                  </div>
+                  <div class="flex justify-between text-[#94A3B8] border-t border-[#101827] pt-1.5">
+                    <span class="font-semibold text-[#38BDF8]">Total:</span>
+                    <span class="text-[#E5E7EB] font-bold" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
+                  </div>
+                </div>
+              </div>
+
+              <!-- Context Size Indicator -->
+              <template x-if="$store.chat.contextSize && $store.chat.contextSize > 0">
+                <div class="bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg p-3 space-y-2">
+                  <div class="flex items-center justify-between mb-2">
+                    <h4 class="text-sm font-semibold text-[#E5E7EB] flex items-center">
+                      <i class="fas fa-database mr-2 text-[#38BDF8]"></i>
+                      Context Window
+                    </h4>
+                  </div>
+                  <div class="space-y-2">
+                    <div class="flex justify-between text-xs text-[#94A3B8] mb-1">
+                      <span>Used / Available</span>
+                      <span class="text-[#E5E7EB] font-medium">
+                        <span x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
+                        / 
+                        <span x-text="new Intl.NumberFormat().format($store.chat.contextSize)"></span>
+                      </span>
+                    </div>
+                    <div class="w-full bg-[#101827] rounded-full h-2 overflow-hidden border border-[#1E293B]">
+                      <div class="h-full rounded-full transition-all duration-300 ease-out"
+                           :class="{
+                             'bg-gradient-to-r from-[#38BDF8] to-[#8B5CF6]': $store.chat.getContextUsagePercent() < 80,
+                             'bg-gradient-to-r from-yellow-500 to-orange-500': $store.chat.getContextUsagePercent() >= 80 && $store.chat.getContextUsagePercent() < 95,
+                             'bg-gradient-to-r from-red-500 to-red-600': $store.chat.getContextUsagePercent() >= 95
+                           }"
+                           :style="'width: ' + Math.min(100, $store.chat.getContextUsagePercent()) + '%'">
+                      </div>
+                    </div>
+                    <div class="flex justify-between text-xs">
+                      <span class="text-[#94A3B8]">
+                        Remaining: 
+                        <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.getRemainingTokens())"></span>
+                      </span>
+                      <span class="text-[#94A3B8]">
+                        <span x-text="Math.round($store.chat.getContextUsagePercent())"></span>%
+                      </span>
+                    </div>
+                    <div x-show="$store.chat.getContextUsagePercent() >= 80" class="mt-2 p-2 bg-yellow-500/10 border border-yellow-500/30 rounded text-yellow-300 text-xs">
+                      <i class="fas fa-exclamation-triangle mr-1"></i>
+                      <span x-show="$store.chat.getContextUsagePercent() >= 95">Context window nearly full!</span>
+                      <span x-show="$store.chat.getContextUsagePercent() >= 80 && $store.chat.getContextUsagePercent() < 95">Approaching context limit</span>
+                    </div>
+                  </div>
+                </div>
+              </template>
+
               {{ if $model }}
               {{ $galleryConfig:= index $allGalleryConfigs $model}}
               {{ if $galleryConfig }}
@@ -167,21 +392,21 @@ <h3 class="text-md font-medium">{{ $model }}</h3>
               {{ end }}
               {{ if and $modelConfig (or (ne $modelConfig.MCP.Servers "") (ne $modelConfig.MCP.Stdio "")) }}
               <!-- MCP Toggle -->
-              <div class="flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700">
-                <span><i class="fa-solid fa-plug mr-2"></i> Agentic MCP Mode</span>
+              <div class="flex items-center justify-between px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#1E293B] border border-[#38BDF8]/20">
+                <span><i class="fa-solid fa-plug mr-2 text-[#38BDF8]"></i> Agentic MCP Mode</span>
                 <label class="relative inline-flex items-center cursor-pointer">
                   <input type="checkbox" id="mcp-toggle" class="sr-only peer" x-model="$store.chat.mcpMode">
-                  <div class="w-11 h-6 bg-gray-600 peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-blue-300 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-blue-600"></div>
+                  <div class="w-11 h-6 bg-[#101827] peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-[#38BDF8]/30 rounded-full peer peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-[#1E293B] after:border after:rounded-full after:h-5 after:w-5 after:transition-all peer-checked:bg-[#38BDF8]"></div>
                 </label>
               </div>
 
               <!-- MCP Mode Notification -->
-              <div x-show="$store.chat.mcpMode" class="p-3 bg-blue-900/20 border border-blue-700/50 rounded text-blue-100 text-xs">
+              <div x-show="$store.chat.mcpMode" class="p-3 bg-[#38BDF8]/10 border border-[#38BDF8]/30 rounded text-[#94A3B8] text-xs">
                 <div class="flex items-start space-x-2">
-                  <i class="fa-solid fa-info-circle text-blue-400 mt-0.5"></i>
+                  <i class="fa-solid fa-info-circle text-[#38BDF8] mt-0.5"></i>
                   <div>
-                    <p class="font-medium text-blue-200 mb-1">Non-streaming Mode Active</p>
-                    <p class="text-blue-300">Responses will be processed in full before display. This may take significantly longer (up to 5 minutes), especially on CPU-only systems.</p>
+                    <p class="font-medium text-[#E5E7EB] mb-1">Non-streaming Mode Active</p>
+                    <p class="text-[#94A3B8]">Responses will be processed in full before display. This may take significantly longer (up to 5 minutes), especially on CPU-only systems.</p>
                   </div>
                 </div>
               </div>
@@ -191,9 +416,9 @@ <h3 class="text-md font-medium">{{ $model }}</h3>
 
               <button
                 @click="showPromptForm = !showPromptForm"
-                class="w-full flex items-center justify-between px-3 py-2 text-sm rounded text-white bg-gray-700 hover:bg-gray-600 transition-colors"
+                class="w-full flex items-center justify-between px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 hover:border-[#38BDF8]/40 transition-colors glow-on-hover"
               >
-                <span><i class="fa-solid fa-message mr-2"></i> System Prompt</span>
+                <span><i class="fa-solid fa-message mr-2 text-[#38BDF8]"></i> System Prompt</span>
                 <i :class="showPromptForm ? 'fa-chevron-up' : 'fa-chevron-down'" class="fa-solid"></i>
               </button>
 
@@ -207,26 +432,26 @@ <h3 class="text-md font-medium">{{ $model }}</h3>
                     setTimeout(() => {this.showToast = false;}, 2000);
                   }
                 } 
-              }" class="p-3 bg-gray-700 rounded">
+              }" class="p-3 bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg">
                 <form id="system_prompt" @submit.prevent="isUpdated" class="flex flex-col space-y-2">
                   <textarea
                     type="text"
                     id="systemPrompt"
                     name="systemPrompt"
-                    class="bg-gray-800 text-white border border-gray-600 focus:border-blue-500 focus:ring focus:ring-blue-500 focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none min-h-24"
+                    class="bg-[#101827] text-[#E5E7EB] border border-[#1E293B] focus:border-[#38BDF8] focus:ring focus:ring-[#38BDF8] focus:ring-opacity-50 rounded-md shadow-sm p-2 appearance-none min-h-24 placeholder-[#94A3B8]"
                     placeholder="System prompt"
                     x-model.lazy="$store.chat.systemPrompt"
                   ></textarea>
                   <div
                     x-show="showToast"
                     x-transition
-                    class="mb-2 text-green-500 px-4 py-2 text-sm text-center"
+                    class="mb-2 text-green-400 px-4 py-2 text-sm text-center bg-green-500/10 border border-green-500/30 rounded"
                   >
                     System prompt updated!
                   </div>
                   <button
                     type="submit"
-                    class="px-3 py-2 text-sm rounded text-white bg-blue-600 hover:bg-blue-700 transition-colors"
+                    class="px-3 py-2 text-sm rounded text-[#101827] bg-[#38BDF8] hover:bg-[#38BDF8]/90 transition-colors font-medium"
                   >
                     Save System Prompt
                   </button>
@@ -243,38 +468,45 @@ <h3 class="text-md font-medium">{{ $model }}</h3>
         :class="sidebarOpen ? 'ml-64' : 'ml-0'">
 
         <!-- Chat header with toggle button -->
-        <div class="border-b border-gray-700 p-4 flex items-center">
-          <!-- Sidebar toggle button moved to be the first element in the header and with clear styling -->
-          <button
-            @click="sidebarOpen = !sidebarOpen"
-            class="mr-4 text-gray-300 hover:text-white focus:outline-none bg-gray-800 hover:bg-gray-700 p-2 rounded"
-            style="min-width: 36px;"
-            title="Toggle settings">
-            <i class="fa-solid" :class="sidebarOpen ? 'fa-times' : 'fa-bars'"></i>
-          </button>
-
+        <div class="border-b border-[#1E293B] p-4 flex items-center justify-between">
           <div class="flex items-center">
-            <i class="fa-solid fa-comments mr-2"></i>
-            {{ if $model }}
-            {{ $galleryConfig:= index $allGalleryConfigs $model}}
-            {{ if $galleryConfig }}
-            {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg w-8 h-8 mr-2">{{end}}
-            {{ end }}
-            {{ end }}
-            <h1 class="text-lg font-semibold">
-              Chat {{ if .Model }} with {{.Model}} {{ end }}
-            </h1>
+            <!-- Sidebar toggle button moved to be the first element in the header and with clear styling -->
+            <button
+              @click="sidebarOpen = !sidebarOpen"
+              class="mr-4 text-[#94A3B8] hover:text-[#E5E7EB] focus:outline-none bg-[#1E293B] hover:bg-[#1E293B]/80 p-2 rounded transition-colors"
+              style="min-width: 36px;"
+              title="Toggle settings">
+              <i class="fa-solid" :class="sidebarOpen ? 'fa-times' : 'fa-bars'"></i>
+            </button>
+
+            <div class="flex items-center">
+              <i class="fa-solid fa-comments mr-2 text-[#38BDF8]"></i>
+              {{ if $model }}
+              {{ $galleryConfig:= index $allGalleryConfigs $model}}
+              {{ if $galleryConfig }}
+              {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg w-8 h-8 mr-2">{{end}}
+              {{ end }}
+              {{ end }}
+              <h1 class="text-lg font-semibold text-[#E5E7EB]">
+                Chat {{ if .Model }} with {{.Model}} {{ end }}
+              </h1>
+              <!-- Loading indicator next to model name -->
+              <div id="header-loading-indicator" class="ml-3 text-[#38BDF8]" style="display: none;">
+                <i class="fas fa-spinner fa-spin text-sm"></i>
+              </div>
+            </div>
           </div>
+
         </div>
 
         <!-- Chat messages area -->
         <div class="flex-1 p-4 overflow-auto" id="chat" x-data="{history: $store.chat.history}">
-          <p id="usage" x-show="history.length === 0" class="text-gray-300">
+          <p id="usage" x-show="history.length === 0" class="text-[#94A3B8]">
             Start chatting with the AI by typing a prompt in the input field below and pressing Enter.<br>
-            <ul class="list-disc list-inside">
-              <li>For models that support images, you can upload an image by clicking the <i class="fa-solid fa-image"></i> icon.</li>
-              <li>For models that support audio, you can upload an audio file by clicking the <i class="fa-solid fa-microphone"></i> icon.</li>
-              <li>To send a text, markdown or PDF file, click the <i class="fa-solid fa-file"></i> icon.</li>
+            <ul class="list-disc list-inside mt-2 space-y-1">
+              <li>For models that support images, you can upload an image by clicking the <i class="fa-solid fa-image text-[#38BDF8]"></i> icon.</li>
+              <li>For models that support audio, you can upload an audio file by clicking the <i class="fa-solid fa-microphone text-[#38BDF8]"></i> icon.</li>
+              <li>To send a text, markdown or PDF file, click the <i class="fa-solid fa-file text-[#38BDF8]"></i> icon.</li>
             </ul>
           </p>
           <div id="messages" class="max-w-3xl mx-auto">
@@ -285,8 +517,8 @@ <h1 class="text-lg font-semibold">
                 <template x-if="message.role === 'user'">
                   <div class="flex items-center space-x-2">
                     <div class="flex flex-col flex-1 items-end">
-                      <span class="text-xs font-semibold text-gray-400">You</span>
-                      <div class="p-2 flex-1 rounded bg-gray-700 text-white" x-html="message.html"></div>
+                      <span class="text-xs font-semibold text-[#94A3B8] mb-1">You</span>
+                      <div class="p-3 flex-1 rounded-lg bg-gradient-to-br from-[#1E293B] to-[#101827] text-[#E5E7EB] border border-[#38BDF8]/20 shadow-lg" x-html="message.html"></div>
                       <template x-if="message.image && message.image.length > 0">
                         <div class="mt-2 space-y-2">
                           <template x-for="(img, index) in message.image" :key="index">
@@ -310,12 +542,12 @@ <h1 class="text-lg font-semibold">
                 <template x-if="message.role === 'thinking'">
                   <div class="flex items-center space-x-2 w-full">
                     <div class="flex flex-col flex-1">
-                      <div class="p-2 flex-1 rounded bg-blue-900/50 text-blue-100 border border-blue-700/50">
-                        <div class="flex items-center space-x-2">
-                          <i class="fa-solid fa-brain text-blue-400"></i>
-                          <span class="text-xs font-semibold text-blue-300">Thinking</span>
+                      <div class="p-3 flex-1 rounded-lg bg-[#38BDF8]/10 text-[#94A3B8] border border-[#38BDF8]/30">
+                        <div class="flex items-center space-x-2 mb-2">
+                          <i class="fa-solid fa-brain text-[#38BDF8]"></i>
+                          <span class="text-xs font-semibold text-[#38BDF8]">Thinking</span>
                         </div>
-                        <div class="mt-1" x-html="message.html"></div>
+                        <div class="mt-1 text-[#E5E7EB]" x-html="message.html"></div>
                       </div>
                     </div>
                   </div>
@@ -323,13 +555,13 @@ <h1 class="text-lg font-semibold">
                 <template x-if="message.role != 'user' && message.role != 'thinking'">
                   <div class="flex items-center space-x-2">
                     {{ if $galleryConfig }}
-                    {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg mt-2 max-w-8 max-h-8">{{end}}
+                    {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg mt-2 max-w-8 max-h-8 border border-[#38BDF8]/20">{{end}}
                     {{ end }}
                     <div class="flex flex-col flex-1">
-                      <span class="text-xs font-semibold text-gray-400">{{if .Model}}{{.Model}}{{else}}Assistant{{end}}</span>
-                      <div class="flex-1 text-white flex items-center space-x-2">
-                        <div x-html="message.html"></div>
-                        <button @click="copyToClipboard(message.html)" title="Copy to clipboard" class="text-gray-400 hover:text-gray-100">
+                      <span class="text-xs font-semibold text-[#94A3B8] mb-1">{{if .Model}}{{.Model}}{{else}}Assistant{{end}}</span>
+                      <div class="flex-1 text-[#E5E7EB] flex items-center space-x-2">
+                        <div class="p-3 rounded-lg bg-gradient-to-br from-[#1E293B] to-[#101827] border border-[#8B5CF6]/20 shadow-lg" x-html="message.html"></div>
+                        <button @click="copyToClipboard(message.html)" title="Copy to clipboard" class="text-[#94A3B8] hover:text-[#38BDF8] transition-colors p-1">
                           <i class="fa-solid fa-copy"></i>
                         </button>
                       </div>
@@ -356,7 +588,7 @@ <h1 class="text-lg font-semibold">
                 {{ else }}
                 <i
                   class="fa-solid h-8 w-8"
-                  :class="message.role === 'user' ? 'fa-user' : 'fa-robot'"
+                  :class="message.role === 'user' ? 'fa-user text-[#38BDF8]' : 'fa-robot text-[#8B5CF6]'"
                 ></i>
                 {{ end }}
               </div>
@@ -366,44 +598,89 @@ <h1 class="text-lg font-semibold">
 
 
           <!-- Chat Input -->
-          <div class="p-4 border-t border-gray-700" x-data="{ inputValue: '', shiftPressed: false, fileName: '', isLoading: false }">
+          <div class="p-4 border-t border-[#1E293B]" x-data="{ inputValue: '', shiftPressed: false, fileName: '' }">
             <form id="prompt" action="chat/{{.Model}}" method="get" @submit.prevent="submitPrompt" class="max-w-3xl mx-auto">
-              <div class="relative w-full bg-gray-800 rounded-xl shadow-md">
+              <!-- Token Usage and Context Window - Compact above input -->
+              <div class="mb-3 flex items-center justify-between gap-4 text-xs">
+                <!-- Token Usage -->
+                <div class="flex items-center gap-3 text-[#94A3B8]">
+                  <div class="flex items-center gap-1">
+                    <i class="fas fa-chart-line text-[#38BDF8]"></i>
+                    <span>Prompt:</span>
+                    <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.promptTokens)"></span>
+                  </div>
+                  <div class="flex items-center gap-1">
+                    <span>Completion:</span>
+                    <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.completionTokens)"></span>
+                  </div>
+                  <div class="flex items-center gap-1 border-l border-[#1E293B] pl-3">
+                    <span class="text-[#38BDF8] font-semibold">Total:</span>
+                    <span class="text-[#E5E7EB] font-bold" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
+                  </div>
+                </div>
+
+                <!-- Context Window -->
+                <template x-if="$store.chat.contextSize && $store.chat.contextSize > 0">
+                  <div class="flex items-center gap-2 text-[#94A3B8]">
+                    <i class="fas fa-database text-[#38BDF8]"></i>
+                    <span>
+                      <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
+                      / 
+                      <span class="text-[#E5E7EB] font-medium" x-text="new Intl.NumberFormat().format($store.chat.contextSize)"></span>
+                    </span>
+                    <div class="w-16 bg-[#101827] rounded-full h-1.5 overflow-hidden border border-[#1E293B]">
+                      <div class="h-full rounded-full transition-all duration-300 ease-out"
+                           :class="{
+                             'bg-gradient-to-r from-[#38BDF8] to-[#8B5CF6]': $store.chat.getContextUsagePercent() < 80,
+                             'bg-gradient-to-r from-yellow-500 to-orange-500': $store.chat.getContextUsagePercent() >= 80 && $store.chat.getContextUsagePercent() < 95,
+                             'bg-gradient-to-r from-red-500 to-red-600': $store.chat.getContextUsagePercent() >= 95
+                           }"
+                           :style="'width: ' + Math.min(100, $store.chat.getContextUsagePercent()) + '%'">
+                      </div>
+                    </div>
+                    <span class="text-[#94A3B8]" x-text="Math.round($store.chat.getContextUsagePercent()) + '%'"></span>
+                    <span x-show="$store.chat.getContextUsagePercent() >= 80" class="text-yellow-400">
+                      <i class="fas fa-exclamation-triangle"></i>
+                    </span>
+                  </div>
+                </template>
+              </div>
+
+              <div class="relative w-full bg-[#1E293B] border border-[#38BDF8]/20 rounded-xl shadow-lg">
                 <textarea
                   id="input"
                   name="input"
                   x-model="inputValue"
                   placeholder="Send a message..."
-                  class="p-4 pr-16 w-full bg-gray-800 text-gray-100 placeholder-gray-400 focus:outline-none resize-none border-0 rounded-xl transition-colors duration-200"
+                  class="p-3 pr-16 w-full bg-[#1E293B] text-[#E5E7EB] placeholder-[#94A3B8] focus:outline-none resize-none border-0 rounded-xl transition-colors duration-200 focus:ring-2 focus:ring-[#38BDF8]/50"
                   required
                   @keydown.shift="shiftPressed = true"
                   @keyup.shift="shiftPressed = false"
-                  @keydown.enter="if (!shiftPressed) { submitPrompt($event); }"
-                  rows="3"
-                  style="box-shadow: 0 0 0 1px rgba(75, 85, 99, 0.4) inset;"
+                  @keydown.enter.prevent="if (!shiftPressed) { submitPrompt($event); }"
+                  rows="2"
                 ></textarea>
-                <span x-text="fileName" id="fileName" class="absolute right-16 top-4 text-gray-400 text-sm mr-2"></span>
+                <span x-text="fileName" id="fileName" class="absolute right-16 top-3 text-[#94A3B8] text-xs mr-2"></span>
                 <button
                   type="button"
                   onclick="document.getElementById('input_image').click()"
-                  class="fa-solid fa-image text-gray-400 absolute right-12 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
+                  class="fa-solid fa-image text-[#94A3B8] absolute right-12 top-3 text-base p-1.5 hover:text-[#38BDF8] transition-colors duration-200"
                   title="Attach images"
                 ></button>
                 <button
                   type="button"
                   onclick="document.getElementById('input_audio').click()"
-                  class="fa-solid fa-microphone text-gray-400 absolute right-20 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
+                  class="fa-solid fa-microphone text-[#94A3B8] absolute right-20 top-3 text-base p-1.5 hover:text-[#38BDF8] transition-colors duration-200"
                   title="Attach an audio file"
                 ></button>
                 <button
                   type="button"
                   onclick="document.getElementById('input_file').click()"
-                  class="fa-solid fa-file text-gray-400 absolute right-28 top-4 text-lg p-2 hover:text-blue-400 transition-colors duration-200"
+                  class="fa-solid fa-file text-[#94A3B8] absolute right-28 top-3 text-base p-1.5 hover:text-[#38BDF8] transition-colors duration-200"
                   title="Upload text, markdown or PDF file"
                 ></button>
 
                 <!-- Send button and stop button in the same position -->
-                <div class="absolute right-3 top-4">
+                <div class="absolute right-3 top-3 flex items-center">
                   <!-- Stop button (hidden by default, shown when request is in progress) -->
                   <button
                     id="stop-button"
@@ -420,15 +697,15 @@ <h1 class="text-lg font-semibold">
                   <button
                     id="send-button"
                     type="submit"
-                    class="text-lg p-2 text-gray-400 hover:text-blue-400 transition-colors duration-200"
-                    title="Send message"
+                    class="text-lg p-2 text-[#94A3B8] hover:text-[#38BDF8] transition-colors duration-200"
+                    title="Send message (Enter)"
                   >
                     <i class="fa-solid fa-paper-plane"></i>
                   </button>
                 </div>
               </div>
             </form>
-            <input id="chat-model" type="hidden" value="{{.Model}}">
+            <input id="chat-model" type="hidden" value="{{.Model}}" {{ if .ContextSize }}data-context-size="{{.ContextSize}}"{{ end }}>
             <input
               id="input_image"
               type="file"
@@ -504,59 +781,9 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white">{{ $model }}</h3
     {{ end }}
     {{ end }}
 
-    <!-- Alpine store initialization -->
+    <!-- Alpine store initialization and utilities -->
     <script>
       document.addEventListener("alpine:init", () => {
-        Alpine.store("chat", {
-          history: [],
-          languages: [undefined],
-          systemPrompt: "",
-          mcpMode: false,
-          clear() {
-            this.history.length = 0;
-          },
-          add(role, content, image, audio) {
-            const N = this.history.length - 1;
-            if (this.history.length && this.history[N].role === role) {
-              this.history[N].content += content;
-              this.history[N].html = DOMPurify.sanitize(
-                marked.parse(this.history[N].content)
-              );
-            } else {
-              let c = "";
-              const lines = content.split("\n");
-              lines.forEach((line) => {
-                c += DOMPurify.sanitize(marked.parse(line));
-              });
-              this.history.push({ role, content, html: c, image, audio });
-            }
-            document.getElementById('messages').scrollIntoView(false);
-            const parser = new DOMParser();
-            const html = parser.parseFromString(
-              this.history[this.history.length - 1].html,
-              "text/html"
-            );
-            const code = html.querySelectorAll("pre code");
-            if (!code.length) return;
-            code.forEach((el) => {
-              const language = el.className.split("language-")[1];
-              if (this.languages.includes(language)) return;
-              const script = document.createElement("script");
-              script.src = `https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.8.0/build/languages/${language}.min.js`;
-              document.head.appendChild(script);
-              this.languages.push(language);
-            });
-          },
-          messages() {
-            return this.history.map((message) => ({
-              role: message.role,
-              content: message.content,
-              image: message.image,
-              audio: message.audio,
-            }));
-          },
-        });
-
         window.copyToClipboard = (content) => {
           const tempElement = document.createElement('div');
           tempElement.innerHTML = content;
@@ -569,6 +796,8 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white">{{ $model }}</h3
           });
         };
       });
+
+      // Context size is now initialized in the Alpine store initialization above
     </script>
   </body>
 </html>

From f2190ab71c1c14e76f68c1e00eeb2afe8d6e8bc0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 10 Nov 2025 13:48:29 +0100
Subject: [PATCH 2/7] Markdown, small improvements

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/views/backends.html | 13 +++++-
 core/http/views/chat.html     | 88 ++++++++++++++++++++++-------------
 core/http/views/models.html   | 13 +++++-
 3 files changed, 80 insertions(+), 34 deletions(-)

diff --git a/core/http/views/backends.html b/core/http/views/backends.html
index 754108bdf7cd..38e9da144e28 100644
--- a/core/http/views/backends.html
+++ b/core/http/views/backends.html
@@ -305,7 +305,7 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selected
                                      class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3" 
                                      loading="lazy">
                             </div>
-                            <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400" x-text="selectedBackend?.description"></p>
+                            <div class="text-base leading-relaxed text-gray-500 dark:text-gray-400" x-html="renderMarkdown(selectedBackend?.description)"></div>
                             <template x-if="selectedBackend?.tags && selectedBackend.tags.length > 0">
                                 <div>
                                     <p class="text-sm mb-3 font-semibold text-gray-900 dark:text-white">Tags</p>
@@ -599,6 +599,17 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selected
             }
         },
         
+        renderMarkdown(text) {
+            if (!text) return '';
+            try {
+                const html = marked.parse(text);
+                return DOMPurify.sanitize(html);
+            } catch (error) {
+                console.error('Error rendering markdown:', error);
+                return text;
+            }
+        },
+        
         openModal(backend) {
             this.selectedBackend = backend;
         },
diff --git a/core/http/views/chat.html b/core/http/views/chat.html
index f9ec82835de3..fef0b3bb5920 100644
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -265,25 +265,8 @@ <h3 class="text-md font-medium">{{ $model }}</h3>
           {{ end }}
           {{ end }}
 
-          <div x-data="{ activeTab: 'actions' }" class="space-y-4">
-            <!-- Tab navigation -->
-            <div class="flex border-b border-[#101827]">
-              <button
-                @click="activeTab = 'actions'"
-                :class="activeTab === 'actions' ? 'border-b-2 border-[#38BDF8] text-[#E5E7EB]' : 'text-[#94A3B8] hover:text-[#E5E7EB]'"
-                class="py-2 px-4 text-sm font-medium">
-                Actions
-              </button>
-              <button
-                @click="activeTab = 'settings'"
-                :class="activeTab === 'settings' ? 'border-b-2 border-[#38BDF8] text-[#E5E7EB]' : 'text-[#94A3B8] hover:text-[#E5E7EB]'"
-                class="py-2 px-4 text-sm font-medium">
-                Settings
-              </button>
-            </div>
-
-            <!-- Actions tab -->
-            <div x-show="activeTab === 'actions'" class="space-y-3">
+          <div x-data="{ showPromptForm: false }" class="space-y-3">
+              <!-- Actions -->
               <button
                 @click="$store.chat.clear()"
                 id="clear"
@@ -300,17 +283,6 @@ <h3 class="text-md font-medium">{{ $model }}</h3>
               >
                 <i class="fas fa-book mr-2 text-[#38BDF8]"></i> Documentation
               </a>
-
-              <a
-                href="browse?term={{.Model}}"
-                class="w-full flex items-center px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 hover:border-[#38BDF8]/40 transition-colors glow-on-hover"
-              >
-                <i class="fas fa-brain mr-2 text-[#38BDF8]"></i> Browse Model
-              </a>
-            </div>
-
-            <!-- Settings tab -->
-            <div x-show="activeTab === 'settings'" x-data="{ showPromptForm: false }" class="space-y-3">
               <!-- Token Usage Statistics -->
               <div class="bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg p-3 space-y-2">
                 <div class="flex items-center justify-between mb-2">
@@ -457,7 +429,6 @@ <h4 class="text-sm font-semibold text-[#E5E7EB] flex items-center">
                   </button>
                 </form>
               </div>
-            </div>
           </div>
         </div>
       </div>
@@ -759,7 +730,7 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white">{{ $model }}</h3
             <div class="flex justify-center items-center">
               {{ if $galleryConfig.Icon }}<img class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded" src="{{$galleryConfig.Icon}}" loading="lazy"/>{{end}}
             </div>
-            <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400">{{ $galleryConfig.Description }}</p>
+            <div id="model-info-description" class="text-base leading-relaxed text-gray-500 dark:text-gray-400">{{ $galleryConfig.Description }}</div>
             <hr>
             <p class="text-sm font-semibold text-gray-900 dark:text-white">Links</p>
             <ul>
@@ -798,6 +769,59 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white">{{ $model }}</h3
       });
 
       // Context size is now initialized in the Alpine store initialization above
+
+      // Process markdown in model info modal when it opens
+      document.addEventListener('DOMContentLoaded', () => {
+        const modalElement = document.getElementById('model-info-modal');
+        const descriptionElement = document.getElementById('model-info-description');
+        
+        if (modalElement && descriptionElement) {
+          // Process markdown on initial load
+          const processMarkdown = () => {
+            if (descriptionElement && typeof marked !== 'undefined' && typeof DOMPurify !== 'undefined') {
+              const originalText = descriptionElement.textContent || descriptionElement.innerText;
+              if (originalText) {
+                try {
+                  const html = marked.parse(originalText);
+                  descriptionElement.innerHTML = DOMPurify.sanitize(html);
+                } catch (error) {
+                  console.error('Error rendering markdown:', error);
+                }
+              }
+            }
+          };
+
+          // Process immediately if modal is already visible
+          if (!modalElement.classList.contains('hidden')) {
+            processMarkdown();
+          }
+
+          // Listen for modal show events (Flowbite uses data-modal-show attribute changes)
+          const observer = new MutationObserver((mutations) => {
+            mutations.forEach((mutation) => {
+              if (mutation.type === 'attributes' && mutation.attributeName === 'aria-hidden') {
+                const isHidden = modalElement.getAttribute('aria-hidden') === 'true';
+                if (!isHidden) {
+                  // Modal is now visible, process markdown
+                  setTimeout(processMarkdown, 100);
+                }
+              }
+            });
+          });
+
+          observer.observe(modalElement, {
+            attributes: true,
+            attributeFilter: ['aria-hidden', 'class']
+          });
+
+          // Also listen for click events on modal toggle buttons
+          document.querySelectorAll('[data-modal-toggle="model-info-modal"]').forEach(button => {
+            button.addEventListener('click', () => {
+              setTimeout(processMarkdown, 200);
+            });
+          });
+        }
+      });
     </script>
   </body>
 </html>
diff --git a/core/http/views/models.html b/core/http/views/models.html
index 200b698c7924..d6ae206a69f8 100644
--- a/core/http/views/models.html
+++ b/core/http/views/models.html
@@ -359,7 +359,7 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selected
                                      class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3" 
                                      loading="lazy">
                             </div>
-                            <p class="text-base leading-relaxed text-gray-500 dark:text-gray-400" x-text="selectedModel?.description"></p>
+                            <div class="text-base leading-relaxed text-gray-500 dark:text-gray-400" x-html="renderMarkdown(selectedModel?.description)"></div>
                             <hr>
                             <template x-if="selectedModel?.urls && selectedModel.urls.length > 0">
                                 <div>
@@ -669,6 +669,17 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selected
             }
         },
         
+        renderMarkdown(text) {
+            if (!text) return '';
+            try {
+                const html = marked.parse(text);
+                return DOMPurify.sanitize(html);
+            } catch (error) {
+                console.error('Error rendering markdown:', error);
+                return text;
+            }
+        },
+        
         openModal(model) {
             this.selectedModel = model;
         },

From cc0d05eef6fa6b22556b1306d47a069b642f66ca Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 10 Nov 2025 15:29:59 +0100
Subject: [PATCH 3/7] Display token/sec into stats

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/static/chat.js      |  56 +++++++++
 core/http/views/backends.html |  41 ++++++-
 core/http/views/chat.html     | 210 +++++++++++++++++++++++-----------
 core/http/views/models.html   |  41 ++++++-
 4 files changed, 282 insertions(+), 66 deletions(-)

diff --git a/core/http/static/chat.js b/core/http/static/chat.js
index f1fe1290b2e6..a598cea9d73d 100644
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -30,22 +30,63 @@ SOFTWARE.
 // Global variable to store the current AbortController
 let currentAbortController = null;
 let currentReader = null;
+let requestStartTime = null;
+let tokensReceived = 0;
+let tokensPerSecondInterval = null;
+let lastTokensPerSecond = null; // Store the last calculated rate
 
 function toggleLoader(show) {
   const sendButton = document.getElementById('send-button');
   const stopButton = document.getElementById('stop-button');
   const headerLoadingIndicator = document.getElementById('header-loading-indicator');
+  const tokensPerSecondDisplay = document.getElementById('tokens-per-second');
   
   if (show) {
     sendButton.style.display = 'none';
     stopButton.style.display = 'block';
     if (headerLoadingIndicator) headerLoadingIndicator.style.display = 'block';
+    // Reset token tracking
+    requestStartTime = Date.now();
+    tokensReceived = 0;
+    
+    // Start updating tokens/second display
+    if (tokensPerSecondDisplay) {
+      tokensPerSecondDisplay.textContent = '-';
+      updateTokensPerSecond();
+      tokensPerSecondInterval = setInterval(updateTokensPerSecond, 500); // Update every 500ms
+    }
   } else {
     sendButton.style.display = 'block';
     stopButton.style.display = 'none';
     if (headerLoadingIndicator) headerLoadingIndicator.style.display = 'none';
+    // Stop updating but keep the last value visible
+    if (tokensPerSecondInterval) {
+      clearInterval(tokensPerSecondInterval);
+      tokensPerSecondInterval = null;
+    }
+    // Keep the last calculated rate visible
+    if (tokensPerSecondDisplay && lastTokensPerSecond !== null) {
+      tokensPerSecondDisplay.textContent = lastTokensPerSecond;
+    }
     currentAbortController = null;
     currentReader = null;
+    requestStartTime = null;
+    tokensReceived = 0;
+  }
+}
+
+function updateTokensPerSecond() {
+  const tokensPerSecondDisplay = document.getElementById('tokens-per-second');
+  if (!tokensPerSecondDisplay || !requestStartTime) return;
+  
+  const elapsedSeconds = (Date.now() - requestStartTime) / 1000;
+  if (elapsedSeconds > 0 && tokensReceived > 0) {
+    const rate = tokensReceived / elapsedSeconds;
+    const formattedRate = `${rate.toFixed(1)} tokens/s`;
+    tokensPerSecondDisplay.textContent = formattedRate;
+    lastTokensPerSecond = formattedRate; // Store the last calculated rate
+  } else if (elapsedSeconds > 0) {
+    tokensPerSecondDisplay.textContent = '-';
   }
 }
 
@@ -227,6 +268,11 @@ function processAndSendMessage(inputValue) {
   if (input) input.value = "";
   const systemPrompt = localStorage.getItem("system_prompt");
   Alpine.nextTick(() => { document.getElementById('messages').scrollIntoView(false); });
+  
+  // Reset token tracking before starting new request
+  requestStartTime = Date.now();
+  tokensReceived = 0;
+  
   promptGPT(systemPrompt, fullInput);
   
   // Reset file contents and names after sending
@@ -412,6 +458,10 @@ async function promptGPT(systemPrompt, input) {
       const content = data.choices[0]?.text || "";
       
       if (content) {
+        // Count tokens for rate calculation (MCP mode - full content at once)
+        tokensReceived += Math.ceil(content.length / 4);
+        updateTokensPerSecond();
+        
         // Process thinking tags using shared function
         const { regularContent, thinkingContent } = processThinkingTags(content);
         
@@ -461,6 +511,9 @@ async function promptGPT(systemPrompt, input) {
     const addToChat = (token) => {
       const chatStore = Alpine.store("chat");
       chatStore.add("assistant", token);
+      // Count tokens for rate calculation (rough estimate: count characters/4)
+      tokensReceived += Math.ceil(token.length / 4);
+      updateTokensPerSecond();
       // Efficiently scroll into view without triggering multiple reflows
       // const messages = document.getElementById('messages');
       // messages.scrollTop = messages.scrollHeight;
@@ -521,6 +574,9 @@ async function promptGPT(systemPrompt, input) {
                 // Handle content based on thinking state
                 if (isThinking) {
                   thinkingContent += token;
+                  // Count tokens for rate calculation
+                  tokensReceived += Math.ceil(token.length / 4);
+                  updateTokensPerSecond();
                   // Update the last thinking message or create a new one
                   if (lastThinkingMessageIndex === -1) {
                     // Create new thinking message
diff --git a/core/http/views/backends.html b/core/http/views/backends.html
index 38e9da144e28..8e807b2151ff 100644
--- a/core/http/views/backends.html
+++ b/core/http/views/backends.html
@@ -305,7 +305,7 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selected
                                      class="rounded-t-lg max-h-48 max-w-96 object-cover mt-3" 
                                      loading="lazy">
                             </div>
-                            <div class="text-base leading-relaxed text-gray-500 dark:text-gray-400" x-html="renderMarkdown(selectedBackend?.description)"></div>
+                            <div class="text-base leading-relaxed text-gray-500 dark:text-gray-400 break-words max-w-full markdown-content" x-html="renderMarkdown(selectedBackend?.description)"></div>
                             <template x-if="selectedBackend?.tags && selectedBackend.tags.length > 0">
                                 <div>
                                     <p class="text-sm mb-3 font-semibold text-gray-900 dark:text-white">Tags</p>
@@ -439,6 +439,42 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selected
 tbody tr:last-child td:last-child {
     border-bottom-right-radius: 1rem;
 }
+
+/* Markdown content overflow handling */
+.markdown-content {
+    word-wrap: break-word;
+    overflow-wrap: anywhere;
+    max-width: 100%;
+}
+
+.markdown-content pre {
+    overflow-x: auto;
+    max-width: 100%;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+
+.markdown-content code {
+    word-wrap: break-word;
+    overflow-wrap: break-word;
+}
+
+.markdown-content pre code {
+    white-space: pre;
+    overflow-x: auto;
+    display: block;
+}
+
+.markdown-content table {
+    max-width: 100%;
+    overflow-x: auto;
+    display: block;
+}
+
+.markdown-content img {
+    max-width: 100%;
+    height: auto;
+}
 </style>
 
 <script>
@@ -602,6 +638,9 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selected
         renderMarkdown(text) {
             if (!text) return '';
             try {
+                if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
+                    return text; // Return plain text if libraries not loaded
+                }
                 const html = marked.parse(text);
                 return DOMPurify.sanitize(html);
             } catch (error) {
diff --git a/core/http/views/chat.html b/core/http/views/chat.html
index fef0b3bb5920..5aa45e3e30c5 100644
--- a/core/http/views/chat.html
+++ b/core/http/views/chat.html
@@ -202,7 +202,16 @@
         :class="sidebarOpen ? 'translate-x-0' : '-translate-x-full'">
 
         <div class="p-4 flex justify-between items-center border-b border-[#101827]">
-          <h2 class="text-lg font-semibold text-[#E5E7EB]">Chat Settings</h2>
+          <div class="flex items-center gap-2">
+            <h2 class="text-lg font-semibold text-[#E5E7EB]">Chat Settings</h2>
+            <a
+              href="https://localai.io/features/text-generation/"
+              target="_blank"
+              class="text-[#94A3B8] hover:text-[#38BDF8] transition-colors"
+              title="Documentation">
+              <i class="fas fa-book text-sm"></i>
+            </a>
+          </div>
           <button
             @click="sidebarOpen = false"
             class="text-[#94A3B8] hover:text-[#E5E7EB] focus:outline-none">
@@ -256,33 +265,28 @@ <h2 class="text-lg font-semibold text-[#E5E7EB]">Chat Settings</h2>
             <div class="flex items-center">
               {{ if $galleryConfig.Icon }}<img src="{{$galleryConfig.Icon}}" class="rounded-lg w-8 h-8 mr-2">{{end}}
               <h3 class="text-md font-medium">{{ $model }}</h3>
-            </div>
-            <button data-twe-ripple-init data-twe-ripple-color="light" class="w-full text-left flex items-center px-3 py-2 text-xs rounded text-[#E5E7EB] bg-[#101827] hover:bg-[#101827]/80 border border-[#38BDF8]/20 transition-colors" data-modal-target="model-info-modal" data-modal-toggle="model-info-modal">
-              <i class="fas fa-info-circle mr-2 text-[#38BDF8]"></i>
-              Model Information
-            </button>
-          </div>
-          {{ end }}
-          {{ end }}
-
-          <div x-data="{ showPromptForm: false }" class="space-y-3">
-              <!-- Actions -->
+              <button 
+                data-twe-ripple-init 
+                data-twe-ripple-color="light" 
+                class="ml-2 text-[#94A3B8] hover:text-[#38BDF8] transition-colors" 
+                data-modal-target="model-info-modal" 
+                data-modal-toggle="model-info-modal"
+                title="Model Information">
+                <i class="fas fa-info-circle text-sm"></i>
+              </button>
               <button
                 @click="$store.chat.clear()"
                 id="clear"
                 title="Clear chat history"
-                class="w-full flex items-center px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#101827] hover:bg-[#101827]/80 border border-[#1E293B] transition-colors"
-              >
-                <i class="fa-solid fa-trash-can mr-2"></i> Clear chat
+                class="ml-2 text-[#94A3B8] hover:text-[#38BDF8] transition-colors">
+                <i class="fa-solid fa-trash-can text-sm"></i>
               </button>
+            </div>
+          </div>
+          {{ end }}
+          {{ end }}
 
-              <a
-                href="https://localai.io/features/text-generation/"
-                target="_blank"
-                class="w-full flex items-center px-3 py-2 text-sm rounded text-[#E5E7EB] bg-[#1E293B] hover:bg-[#1E293B]/80 border border-[#38BDF8]/20 hover:border-[#38BDF8]/40 transition-colors glow-on-hover"
-              >
-                <i class="fas fa-book mr-2 text-[#38BDF8]"></i> Documentation
-              </a>
+          <div x-data="{ showPromptForm: false }" class="space-y-3">
               <!-- Token Usage Statistics -->
               <div class="bg-[#1E293B] border border-[#38BDF8]/20 rounded-lg p-3 space-y-2">
                 <div class="flex items-center justify-between mb-2">
@@ -588,6 +592,11 @@ <h1 class="text-lg font-semibold text-[#E5E7EB]">
                     <span class="text-[#38BDF8] font-semibold">Total:</span>
                     <span class="text-[#E5E7EB] font-bold" x-text="new Intl.NumberFormat().format($store.chat.tokenUsage.totalTokens)"></span>
                   </div>
+                  <!-- Tokens per second display -->
+                  <div id="tokens-per-second-container" class="flex items-center gap-1 border-l border-[#1E293B] pl-3">
+                    <i class="fas fa-tachometer-alt text-[#38BDF8]"></i>
+                    <span id="tokens-per-second" class="text-[#E5E7EB] font-medium">-</span>
+                  </div>
                 </div>
 
                 <!-- Context Window -->
@@ -730,7 +739,7 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white">{{ $model }}</h3
             <div class="flex justify-center items-center">
               {{ if $galleryConfig.Icon }}<img class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3 entered loaded" src="{{$galleryConfig.Icon}}" loading="lazy"/>{{end}}
             </div>
-            <div id="model-info-description" class="text-base leading-relaxed text-gray-500 dark:text-gray-400">{{ $galleryConfig.Description }}</div>
+            <div id="model-info-description" class="text-base leading-relaxed text-gray-500 dark:text-gray-400 break-words max-w-full">{{ $galleryConfig.Description }}</div>
             <hr>
             <p class="text-sm font-semibold text-gray-900 dark:text-white">Links</p>
             <ul>
@@ -771,57 +780,130 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white">{{ $model }}</h3
       // Context size is now initialized in the Alpine store initialization above
 
       // Process markdown in model info modal when it opens
-      document.addEventListener('DOMContentLoaded', () => {
+      function initMarkdownProcessing() {
+        // Wait for marked and DOMPurify to be available
+        if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
+          setTimeout(initMarkdownProcessing, 100);
+          return;
+        }
+
         const modalElement = document.getElementById('model-info-modal');
         const descriptionElement = document.getElementById('model-info-description');
         
-        if (modalElement && descriptionElement) {
-          // Process markdown on initial load
-          const processMarkdown = () => {
-            if (descriptionElement && typeof marked !== 'undefined' && typeof DOMPurify !== 'undefined') {
-              const originalText = descriptionElement.textContent || descriptionElement.innerText;
-              if (originalText) {
-                try {
-                  const html = marked.parse(originalText);
-                  descriptionElement.innerHTML = DOMPurify.sanitize(html);
-                } catch (error) {
-                  console.error('Error rendering markdown:', error);
-                }
-              }
-            }
-          };
+        if (!modalElement || !descriptionElement) {
+          return;
+        }
+
+        // Store original text in data attribute if not already stored
+        let originalText = descriptionElement.dataset.originalText;
+        if (!originalText) {
+          originalText = descriptionElement.textContent || descriptionElement.innerText;
+          descriptionElement.dataset.originalText = originalText;
+        }
 
-          // Process immediately if modal is already visible
-          if (!modalElement.classList.contains('hidden')) {
-            processMarkdown();
+        // Process markdown function
+        const processMarkdown = () => {
+          if (!descriptionElement || !originalText) return;
+          
+          try {
+            // Check if already processed (has HTML tags that look like markdown output)
+            const currentContent = descriptionElement.innerHTML.trim();
+            if (currentContent.startsWith('<') && (currentContent.includes('<p>') || currentContent.includes('<h') || currentContent.includes('<ul>') || currentContent.includes('<ol>'))) {
+              return; // Already processed
+            }
+            
+            // Use stored original text
+            const textToProcess = descriptionElement.dataset.originalText || originalText;
+            if (textToProcess && textToProcess.trim()) {
+              const html = marked.parse(textToProcess);
+              descriptionElement.innerHTML = DOMPurify.sanitize(html);
+            }
+          } catch (error) {
+            console.error('Error rendering markdown:', error);
           }
+        };
+
+        // Process immediately if modal is already visible
+        if (!modalElement.classList.contains('hidden')) {
+          processMarkdown();
+        }
 
-          // Listen for modal show events (Flowbite uses data-modal-show attribute changes)
-          const observer = new MutationObserver((mutations) => {
-            mutations.forEach((mutation) => {
-              if (mutation.type === 'attributes' && mutation.attributeName === 'aria-hidden') {
-                const isHidden = modalElement.getAttribute('aria-hidden') === 'true';
-                if (!isHidden) {
-                  // Modal is now visible, process markdown
-                  setTimeout(processMarkdown, 100);
-                }
+        // Listen for modal show events - check both aria-hidden and class changes
+        const observer = new MutationObserver((mutations) => {
+          mutations.forEach((mutation) => {
+            if (mutation.type === 'attributes') {
+              const isHidden = modalElement.classList.contains('hidden') || 
+                              modalElement.getAttribute('aria-hidden') === 'true';
+              if (!isHidden) {
+                // Modal is now visible, process markdown
+                setTimeout(processMarkdown, 150);
               }
-            });
+            }
           });
-
-          observer.observe(modalElement, {
-            attributes: true,
-            attributeFilter: ['aria-hidden', 'class']
+        });
+
+        observer.observe(modalElement, {
+          attributes: true,
+          attributeFilter: ['aria-hidden', 'class'],
+          childList: false,
+          subtree: false
+        });
+
+        // Also listen for click events on modal toggle buttons
+        document.querySelectorAll('[data-modal-toggle="model-info-modal"]').forEach(button => {
+          button.addEventListener('click', () => {
+            setTimeout(processMarkdown, 300);
           });
+        });
 
-          // Also listen for click events on modal toggle buttons
-          document.querySelectorAll('[data-modal-toggle="model-info-modal"]').forEach(button => {
-            button.addEventListener('click', () => {
-              setTimeout(processMarkdown, 200);
-            });
-          });
-        }
-      });
+        // Process on initial load if libraries are ready
+        setTimeout(processMarkdown, 200);
+      }
+
+      // Start initialization
+      if (document.readyState === 'loading') {
+        document.addEventListener('DOMContentLoaded', initMarkdownProcessing);
+      } else {
+        initMarkdownProcessing();
+      }
     </script>
+
+    <style>
+    /* Markdown content overflow handling */
+    #model-info-description {
+        word-wrap: break-word;
+        overflow-wrap: anywhere;
+        max-width: 100%;
+    }
+
+    #model-info-description pre {
+        overflow-x: auto;
+        max-width: 100%;
+        white-space: pre-wrap;
+        word-wrap: break-word;
+    }
+
+    #model-info-description code {
+        word-wrap: break-word;
+        overflow-wrap: break-word;
+    }
+
+    #model-info-description pre code {
+        white-space: pre;
+        overflow-x: auto;
+        display: block;
+    }
+
+    #model-info-description table {
+        max-width: 100%;
+        overflow-x: auto;
+        display: block;
+    }
+
+    #model-info-description img {
+        max-width: 100%;
+        height: auto;
+    }
+    </style>
   </body>
 </html>
diff --git a/core/http/views/models.html b/core/http/views/models.html
index d6ae206a69f8..2cc1a37861ce 100644
--- a/core/http/views/models.html
+++ b/core/http/views/models.html
@@ -359,7 +359,7 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selected
                                      class="lazy rounded-t-lg max-h-48 max-w-96 object-cover mt-3" 
                                      loading="lazy">
                             </div>
-                            <div class="text-base leading-relaxed text-gray-500 dark:text-gray-400" x-html="renderMarkdown(selectedModel?.description)"></div>
+                            <div class="text-base leading-relaxed text-gray-500 dark:text-gray-400 break-words max-w-full markdown-content" x-html="renderMarkdown(selectedModel?.description)"></div>
                             <hr>
                             <template x-if="selectedModel?.urls && selectedModel.urls.length > 0">
                                 <div>
@@ -495,6 +495,42 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selected
 tbody tr:last-child td:last-child {
     border-bottom-right-radius: 1rem;
 }
+
+/* Markdown content overflow handling */
+.markdown-content {
+    word-wrap: break-word;
+    overflow-wrap: anywhere;
+    max-width: 100%;
+}
+
+.markdown-content pre {
+    overflow-x: auto;
+    max-width: 100%;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+
+.markdown-content code {
+    word-wrap: break-word;
+    overflow-wrap: break-word;
+}
+
+.markdown-content pre code {
+    white-space: pre;
+    overflow-x: auto;
+    display: block;
+}
+
+.markdown-content table {
+    max-width: 100%;
+    overflow-x: auto;
+    display: block;
+}
+
+.markdown-content img {
+    max-width: 100%;
+    height: auto;
+}
 </style>
 
 <script>
@@ -672,6 +708,9 @@ <h3 class="text-xl font-semibold text-gray-900 dark:text-white" x-text="selected
         renderMarkdown(text) {
             if (!text) return '';
             try {
+                if (typeof marked === 'undefined' || typeof DOMPurify === 'undefined') {
+                    return text; // Return plain text if libraries not loaded
+                }
                 const html = marked.parse(text);
                 return DOMPurify.sanitize(html);
             } catch (error) {

From 43ae149495c7799164bdaada163ba1e833cfbce7 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 10 Nov 2025 15:41:43 +0100
Subject: [PATCH 4/7] Minor enhancement

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/middleware/request.go | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/core/http/middleware/request.go b/core/http/middleware/request.go
index 4ec9613711c2..06647ea57891 100644
--- a/core/http/middleware/request.go
+++ b/core/http/middleware/request.go
@@ -15,6 +15,7 @@ import (
 	"github.com/mudler/LocalAI/pkg/functions"
 	"github.com/mudler/LocalAI/pkg/model"
 	"github.com/mudler/LocalAI/pkg/utils"
+	"github.com/valyala/fasthttp"
 
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
@@ -167,11 +168,12 @@ func (re *RequestExtractor) SetOpenAIRequest(ctx *fiber.Ctx) error {
 	c1, cancel := context.WithCancel(re.applicationConfig.Context)
 	// Monitor the Fiber context and cancel our context when it's canceled
 	// This ensures we respect request cancellation without causing panics
-	go func() {
-		<-ctx.Context().Done()
-		// Fiber context was canceled (request completed or client disconnected)
-		cancel()
-	}()
+	go func(fiberCtx *fasthttp.RequestCtx) {
+		if fiberCtx != nil {
+			<-fiberCtx.Done()
+			cancel()
+		}
+	}(ctx.Context())
 	// Add the correlation ID to the new context
 	ctxWithCorrelationID := context.WithValue(c1, CorrelationIDKey, correlationID)
 

From 0b5c2a570024fd86dbe69f7c5d68b50f312182fb Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 10 Nov 2025 16:05:43 +0100
Subject: [PATCH 5/7] Small fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .air.toml                |  1 +
 core/http/static/chat.js | 21 ++++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/.air.toml b/.air.toml
index 123d7272c2a4..1e8442249dc8 100644
--- a/.air.toml
+++ b/.air.toml
@@ -2,6 +2,7 @@
 [build]
 cmd = "make build"
 bin = "./local-ai"
+args_bin = [ "--debug" ]
 include_ext = ["go", "html", "yaml", "toml", "json", "txt", "md"]
 exclude_dir = ["pkg/grpc/proto"]
 delay = 1000
diff --git a/core/http/static/chat.js b/core/http/static/chat.js
index a598cea9d73d..993c956ac91a 100644
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -454,12 +454,27 @@ async function promptGPT(systemPrompt, input) {
         Alpine.store("chat").updateTokenUsage(data.usage);
       }
       
-      // MCP endpoint returns content in choices[0].text, not choices[0].message.content
-      const content = data.choices[0]?.text || "";
+      // MCP endpoint returns content in choices[0].message.content (chat completion format)
+      // Fallback to choices[0].text for backward compatibility (completion format)
+      const content = data.choices[0]?.message?.content || data.choices[0]?.text || "";
+      
+      if (!content && (!data.choices || data.choices.length === 0)) {
+        Alpine.store("chat").add(
+          "assistant",
+          `<span class='error'>Error: Empty response from MCP endpoint</span>`,
+        );
+        toggleLoader(false);
+        return;
+      }
       
       if (content) {
         // Count tokens for rate calculation (MCP mode - full content at once)
-        tokensReceived += Math.ceil(content.length / 4);
+        // Prefer actual token count from API if available
+        if (data.usage && data.usage.completion_tokens) {
+          tokensReceived = data.usage.completion_tokens;
+        } else {
+          tokensReceived += Math.ceil(content.length / 4);
+        }
         updateTokensPerSecond();
         
         // Process thinking tags using shared function

From ab1b3d6da965830e5ab26669338e3abe1ec39e97 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 10 Nov 2025 16:20:15 +0100
Subject: [PATCH 6/7] Fixups

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/openai/mcp.go | 22 ++++++++++++++++++----
 core/http/static/chat.js          | 21 ++++++++++++++++++++-
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/core/http/endpoints/openai/mcp.go b/core/http/endpoints/openai/mcp.go
index fe018bbbd09c..a0e51f91e193 100644
--- a/core/http/endpoints/openai/mcp.go
+++ b/core/http/endpoints/openai/mcp.go
@@ -72,12 +72,25 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 			fragment = fragment.AddMessage(message.Role, message.StringContent)
 		}
 
-		port := appConfig.APIAddress[strings.LastIndex(appConfig.APIAddress, ":")+1:]
+		// Extract port from APIAddress (format: ":8080" or "127.0.0.1:8080" or "0.0.0.0:8080")
+		port := "8080" // default
+		if appConfig.APIAddress != "" {
+			lastColon := strings.LastIndex(appConfig.APIAddress, ":")
+			if lastColon >= 0 && lastColon+1 < len(appConfig.APIAddress) {
+				port = appConfig.APIAddress[lastColon+1:]
+			} else {
+				log.Warn().Str("APIAddress", appConfig.APIAddress).Msg("[MCP] Could not extract port from APIAddress, using default 8080")
+			}
+		}
+
 		apiKey := ""
-		if appConfig.ApiKeys != nil {
+		if len(appConfig.ApiKeys) > 0 {
 			apiKey = appConfig.ApiKeys[0]
 		}
 
+		baseURL := "http://127.0.0.1:" + port
+		log.Debug().Str("baseURL", baseURL).Str("model", config.Name).Msg("[MCP] Creating OpenAI LLM client for internal API calls")
+
 		ctxWithCancellation, cancel := context.WithCancel(ctx)
 		defer cancel()
 		handleConnectionCancellation(c, cancel, ctxWithCancellation)
@@ -85,7 +98,7 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 		// and act like completion.go.
 		// We can do this as cogito expects an interface and we can create one that
 		// we satisfy to just call internally ComputeChoices
-		defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, "http://127.0.0.1:"+port)
+		defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, baseURL)
 
 		cogitoOpts := []cogito.Option{
 			cogito.WithStatusCallback(func(s string) {
@@ -127,7 +140,8 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 			cogitoOpts...,
 		)
 		if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) {
-			return err
+			log.Error().Err(err).Msgf("[MCP] ExecuteTools failed for model %s", config.Name)
+			return fmt.Errorf("failed to execute tools: %w", err)
 		}
 
 		f, err = defaultLLM.Ask(ctx, f)
diff --git a/core/http/static/chat.js b/core/http/static/chat.js
index 993c956ac91a..5bb1934d7024 100644
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -435,9 +435,28 @@ async function promptGPT(systemPrompt, input) {
   }
 
   if (!response.ok) {
+    // Try to get error details from response body
+    let errorMessage = `Error: POST ${endpoint} ${response.status}`;
+    try {
+      const errorData = await response.json();
+      if (errorData && errorData.error && errorData.error.message) {
+        errorMessage = `Error (${response.status}): ${errorData.error.message}`;
+      }
+    } catch (e) {
+      // If response is not JSON, try to get text
+      try {
+        const errorText = await response.text();
+        if (errorText) {
+          errorMessage = `Error (${response.status}): ${errorText.substring(0, 200)}`;
+        }
+      } catch (e2) {
+        // Ignore - use default error message
+      }
+    }
+    
     Alpine.store("chat").add(
       "assistant",
-      `<span class='error'>Error: POST ${endpoint} ${response.status}</span>`,
+      `<span class='error'>${errorMessage}</span>`,
     );
     toggleLoader(false);
     currentAbortController = null;

From c52c4f486dcb5f821073311b1acd40e9c6ffd9ff Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Mon, 10 Nov 2025 16:22:00 +0100
Subject: [PATCH 7/7] Revert "Fixups"

This reverts commit ab1b3d6da965830e5ab26669338e3abe1ec39e97.
---
 core/http/endpoints/openai/mcp.go | 22 ++++------------------
 core/http/static/chat.js          | 21 +--------------------
 2 files changed, 5 insertions(+), 38 deletions(-)

diff --git a/core/http/endpoints/openai/mcp.go b/core/http/endpoints/openai/mcp.go
index a0e51f91e193..fe018bbbd09c 100644
--- a/core/http/endpoints/openai/mcp.go
+++ b/core/http/endpoints/openai/mcp.go
@@ -72,25 +72,12 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 			fragment = fragment.AddMessage(message.Role, message.StringContent)
 		}
 
-		// Extract port from APIAddress (format: ":8080" or "127.0.0.1:8080" or "0.0.0.0:8080")
-		port := "8080" // default
-		if appConfig.APIAddress != "" {
-			lastColon := strings.LastIndex(appConfig.APIAddress, ":")
-			if lastColon >= 0 && lastColon+1 < len(appConfig.APIAddress) {
-				port = appConfig.APIAddress[lastColon+1:]
-			} else {
-				log.Warn().Str("APIAddress", appConfig.APIAddress).Msg("[MCP] Could not extract port from APIAddress, using default 8080")
-			}
-		}
-
+		port := appConfig.APIAddress[strings.LastIndex(appConfig.APIAddress, ":")+1:]
 		apiKey := ""
-		if len(appConfig.ApiKeys) > 0 {
+		if appConfig.ApiKeys != nil {
 			apiKey = appConfig.ApiKeys[0]
 		}
 
-		baseURL := "http://127.0.0.1:" + port
-		log.Debug().Str("baseURL", baseURL).Str("model", config.Name).Msg("[MCP] Creating OpenAI LLM client for internal API calls")
-
 		ctxWithCancellation, cancel := context.WithCancel(ctx)
 		defer cancel()
 		handleConnectionCancellation(c, cancel, ctxWithCancellation)
@@ -98,7 +85,7 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 		// and act like completion.go.
 		// We can do this as cogito expects an interface and we can create one that
 		// we satisfy to just call internally ComputeChoices
-		defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, baseURL)
+		defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, "http://127.0.0.1:"+port)
 
 		cogitoOpts := []cogito.Option{
 			cogito.WithStatusCallback(func(s string) {
@@ -140,8 +127,7 @@ func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader,
 			cogitoOpts...,
 		)
 		if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) {
-			log.Error().Err(err).Msgf("[MCP] ExecuteTools failed for model %s", config.Name)
-			return fmt.Errorf("failed to execute tools: %w", err)
+			return err
 		}
 
 		f, err = defaultLLM.Ask(ctx, f)
diff --git a/core/http/static/chat.js b/core/http/static/chat.js
index 5bb1934d7024..993c956ac91a 100644
--- a/core/http/static/chat.js
+++ b/core/http/static/chat.js
@@ -435,28 +435,9 @@ async function promptGPT(systemPrompt, input) {
   }
 
   if (!response.ok) {
-    // Try to get error details from response body
-    let errorMessage = `Error: POST ${endpoint} ${response.status}`;
-    try {
-      const errorData = await response.json();
-      if (errorData && errorData.error && errorData.error.message) {
-        errorMessage = `Error (${response.status}): ${errorData.error.message}`;
-      }
-    } catch (e) {
-      // If response is not JSON, try to get text
-      try {
-        const errorText = await response.text();
-        if (errorText) {
-          errorMessage = `Error (${response.status}): ${errorText.substring(0, 200)}`;
-        }
-      } catch (e2) {
-        // Ignore - use default error message
-      }
-    }
-    
     Alpine.store("chat").add(
       "assistant",
-      `<span class='error'>${errorMessage}</span>`,
+      `<span class='error'>Error: POST ${endpoint} ${response.status}</span>`,
     );
     toggleLoader(false);
     currentAbortController = null;