mudler · mudler · Apr 28, 2026 · Apr 28, 2026
diff --git a/swagger/docs.go b/swagger/docs.go
@@ -842,15 +842,15 @@ const docTemplate = `{
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/localai.vramEstimateRequest"
+                            "$ref": "#/definitions/modeladmin.VRAMRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
                         "description": "VRAM estimate",
                         "schema": {
-                            "$ref": "#/definitions/localai.vramEstimateResponse"
+                            "$ref": "#/definitions/modeladmin.VRAMResponse"
                         }
                     }
                 }
@@ -3044,36 +3044,45 @@ const docTemplate = `{
                 }
             }
         },
-        "localai.vramEstimateRequest": {
+        "model.BackendLogLine": {
+            "type": "object",
+            "properties": {
+                "stream": {
+                    "description": "\"stdout\" or \"stderr\"",
+                    "type": "string"
+                },
+                "text": {
+                    "type": "string"
+                },
+                "timestamp": {
+                    "type": "string"
+                }
+            }
+        },
+        "modeladmin.VRAMRequest": {
             "type": "object",
             "properties": {
                 "context_size": {
-                    "description": "context length to estimate for (default 8192)",
                     "type": "integer"
                 },
                 "gpu_layers": {
-                    "description": "number of layers to offload to GPU (0 = all)",
                     "type": "integer"
                 },
                 "kv_quant_bits": {
-                    "description": "KV cache quantization bits (0 = fp16)",
                     "type": "integer"
                 },
                 "model": {
-                    "description": "model name (must be installed)",
                     "type": "string"
                 }
             }
         },
-        "localai.vramEstimateResponse": {
+        "modeladmin.VRAMResponse": {
             "type": "object",
             "properties": {
                 "context_note": {
-                    "description": "note when context_size was defaulted",
                     "type": "string"
                 },
                 "model_max_context": {
-                    "description": "model's trained maximum context length",
                     "type": "integer"
                 },
                 "sizeBytes": {
@@ -3094,21 +3103,6 @@ const docTemplate = `{
                 }
             }
         },
-        "model.BackendLogLine": {
-            "type": "object",
-            "properties": {
-                "stream": {
-                    "description": "\"stdout\" or \"stderr\"",
-                    "type": "string"
-                },
-                "text": {
-                    "type": "string"
-                },
-                "timestamp": {
-                    "type": "string"
-                }
-            }
-        },
         "proto.MemoryUsageData": {
             "type": "object",
             "properties": {

diff --git a/swagger/swagger.json b/swagger/swagger.json
@@ -839,15 +839,15 @@
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/localai.vramEstimateRequest"
+                            "$ref": "#/definitions/modeladmin.VRAMRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
                         "description": "VRAM estimate",
                         "schema": {
-                            "$ref": "#/definitions/localai.vramEstimateResponse"
+                            "$ref": "#/definitions/modeladmin.VRAMResponse"
                         }
                     }
                 }
@@ -3041,36 +3041,45 @@
                 }
             }
         },
-        "localai.vramEstimateRequest": {
+        "model.BackendLogLine": {
+            "type": "object",
+            "properties": {
+                "stream": {
+                    "description": "\"stdout\" or \"stderr\"",
+                    "type": "string"
+                },
+                "text": {
+                    "type": "string"
+                },
+                "timestamp": {
+                    "type": "string"
+                }
+            }
+        },
+        "modeladmin.VRAMRequest": {
             "type": "object",
             "properties": {
                 "context_size": {
-                    "description": "context length to estimate for (default 8192)",
                     "type": "integer"
                 },
                 "gpu_layers": {
-                    "description": "number of layers to offload to GPU (0 = all)",
                     "type": "integer"
                 },
                 "kv_quant_bits": {
-                    "description": "KV cache quantization bits (0 = fp16)",
                     "type": "integer"
                 },
                 "model": {
-                    "description": "model name (must be installed)",
                     "type": "string"
                 }
             }
         },
-        "localai.vramEstimateResponse": {
+        "modeladmin.VRAMResponse": {
             "type": "object",
             "properties": {
                 "context_note": {
-                    "description": "note when context_size was defaulted",
                     "type": "string"
                 },
                 "model_max_context": {
-                    "description": "model's trained maximum context length",
                     "type": "integer"
                 },
                 "sizeBytes": {
@@ -3091,21 +3100,6 @@
                 }
             }
         },
-        "model.BackendLogLine": {
-            "type": "object",
-            "properties": {
-                "stream": {
-                    "description": "\"stdout\" or \"stderr\"",
-                    "type": "string"
-                },
-                "text": {
-                    "type": "string"
-                },
-                "timestamp": {
-                    "type": "string"
-                }
-            }
-        },
         "proto.MemoryUsageData": {
             "type": "object",
             "properties": {

diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
@@ -313,28 +313,32 @@ definitions:
           >= 1.
         type: integer
     type: object
-  localai.vramEstimateRequest:
+  model.BackendLogLine:
+    properties:
+      stream:
+        description: '"stdout" or "stderr"'
+        type: string
+      text:
+        type: string
+      timestamp:
+        type: string
+    type: object
+  modeladmin.VRAMRequest:
     properties:
       context_size:
-        description: context length to estimate for (default 8192)
         type: integer
       gpu_layers:
-        description: number of layers to offload to GPU (0 = all)
         type: integer
       kv_quant_bits:
-        description: KV cache quantization bits (0 = fp16)
         type: integer
       model:
-        description: model name (must be installed)
         type: string
     type: object
-  localai.vramEstimateResponse:
+  modeladmin.VRAMResponse:
     properties:
       context_note:
-        description: note when context_size was defaulted
         type: string
       model_max_context:
-        description: model's trained maximum context length
         type: integer
       sizeBytes:
         description: total model weight size in bytes
@@ -349,16 +353,6 @@ definitions:
         description: human-readable VRAM (e.g. "6.1 GB")
         type: string
     type: object
-  model.BackendLogLine:
-    properties:
-      stream:
-        description: '"stdout" or "stderr"'
-        type: string
-      text:
-        type: string
-      timestamp:
-        type: string
-    type: object
   proto.MemoryUsageData:
     properties:
       breakdown:
@@ -2710,14 +2704,14 @@ paths:
         name: request
         required: true
         schema:
-          $ref: '#/definitions/localai.vramEstimateRequest'
+          $ref: '#/definitions/modeladmin.VRAMRequest'
       produces:
       - application/json
       responses:
         "200":
           description: VRAM estimate
           schema:
-            $ref: '#/definitions/localai.vramEstimateResponse'
+            $ref: '#/definitions/modeladmin.VRAMResponse'
       summary: Estimate VRAM usage for a model
       tags:
       - config