Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 19 additions & 25 deletions swagger/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -842,15 +842,15 @@ const docTemplate = `{
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/localai.vramEstimateRequest"
"$ref": "#/definitions/modeladmin.VRAMRequest"
}
}
],
"responses": {
"200": {
"description": "VRAM estimate",
"schema": {
"$ref": "#/definitions/localai.vramEstimateResponse"
"$ref": "#/definitions/modeladmin.VRAMResponse"
}
}
}
Expand Down Expand Up @@ -3044,36 +3044,45 @@ const docTemplate = `{
}
}
},
"localai.vramEstimateRequest": {
"model.BackendLogLine": {
"type": "object",
"properties": {
"stream": {
"description": "\"stdout\" or \"stderr\"",
"type": "string"
},
"text": {
"type": "string"
},
"timestamp": {
"type": "string"
}
}
},
"modeladmin.VRAMRequest": {
"type": "object",
"properties": {
"context_size": {
"description": "context length to estimate for (default 8192)",
"type": "integer"
},
"gpu_layers": {
"description": "number of layers to offload to GPU (0 = all)",
"type": "integer"
},
"kv_quant_bits": {
"description": "KV cache quantization bits (0 = fp16)",
"type": "integer"
},
"model": {
"description": "model name (must be installed)",
"type": "string"
}
}
},
"localai.vramEstimateResponse": {
"modeladmin.VRAMResponse": {
"type": "object",
"properties": {
"context_note": {
"description": "note when context_size was defaulted",
"type": "string"
},
"model_max_context": {
"description": "model's trained maximum context length",
"type": "integer"
},
"sizeBytes": {
Expand All @@ -3094,21 +3103,6 @@ const docTemplate = `{
}
}
},
"model.BackendLogLine": {
"type": "object",
"properties": {
"stream": {
"description": "\"stdout\" or \"stderr\"",
"type": "string"
},
"text": {
"type": "string"
},
"timestamp": {
"type": "string"
}
}
},
"proto.MemoryUsageData": {
"type": "object",
"properties": {
Expand Down
44 changes: 19 additions & 25 deletions swagger/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -839,15 +839,15 @@
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/localai.vramEstimateRequest"
"$ref": "#/definitions/modeladmin.VRAMRequest"
}
}
],
"responses": {
"200": {
"description": "VRAM estimate",
"schema": {
"$ref": "#/definitions/localai.vramEstimateResponse"
"$ref": "#/definitions/modeladmin.VRAMResponse"
}
}
}
Expand Down Expand Up @@ -3041,36 +3041,45 @@
}
}
},
"localai.vramEstimateRequest": {
"model.BackendLogLine": {
"type": "object",
"properties": {
"stream": {
"description": "\"stdout\" or \"stderr\"",
"type": "string"
},
"text": {
"type": "string"
},
"timestamp": {
"type": "string"
}
}
},
"modeladmin.VRAMRequest": {
"type": "object",
"properties": {
"context_size": {
"description": "context length to estimate for (default 8192)",
"type": "integer"
},
"gpu_layers": {
"description": "number of layers to offload to GPU (0 = all)",
"type": "integer"
},
"kv_quant_bits": {
"description": "KV cache quantization bits (0 = fp16)",
"type": "integer"
},
"model": {
"description": "model name (must be installed)",
"type": "string"
}
}
},
"localai.vramEstimateResponse": {
"modeladmin.VRAMResponse": {
"type": "object",
"properties": {
"context_note": {
"description": "note when context_size was defaulted",
"type": "string"
},
"model_max_context": {
"description": "model's trained maximum context length",
"type": "integer"
},
"sizeBytes": {
Expand All @@ -3091,21 +3100,6 @@
}
}
},
"model.BackendLogLine": {
"type": "object",
"properties": {
"stream": {
"description": "\"stdout\" or \"stderr\"",
"type": "string"
},
"text": {
"type": "string"
},
"timestamp": {
"type": "string"
}
}
},
"proto.MemoryUsageData": {
"type": "object",
"properties": {
Expand Down
34 changes: 14 additions & 20 deletions swagger/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -313,28 +313,32 @@ definitions:
>= 1.
type: integer
type: object
localai.vramEstimateRequest:
model.BackendLogLine:
properties:
stream:
description: '"stdout" or "stderr"'
type: string
text:
type: string
timestamp:
type: string
type: object
modeladmin.VRAMRequest:
properties:
context_size:
description: context length to estimate for (default 8192)
type: integer
gpu_layers:
description: number of layers to offload to GPU (0 = all)
type: integer
kv_quant_bits:
description: KV cache quantization bits (0 = fp16)
type: integer
model:
description: model name (must be installed)
type: string
type: object
localai.vramEstimateResponse:
modeladmin.VRAMResponse:
properties:
context_note:
description: note when context_size was defaulted
type: string
model_max_context:
description: model's trained maximum context length
type: integer
sizeBytes:
description: total model weight size in bytes
Expand All @@ -349,16 +353,6 @@ definitions:
description: human-readable VRAM (e.g. "6.1 GB")
type: string
type: object
model.BackendLogLine:
properties:
stream:
description: '"stdout" or "stderr"'
type: string
text:
type: string
timestamp:
type: string
type: object
proto.MemoryUsageData:
properties:
breakdown:
Expand Down Expand Up @@ -2710,14 +2704,14 @@ paths:
name: request
required: true
schema:
$ref: '#/definitions/localai.vramEstimateRequest'
$ref: '#/definitions/modeladmin.VRAMRequest'
produces:
- application/json
responses:
"200":
description: VRAM estimate
schema:
$ref: '#/definitions/localai.vramEstimateResponse'
$ref: '#/definitions/modeladmin.VRAMResponse'
summary: Estimate VRAM usage for a model
tags:
- config
Expand Down
Loading