Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/backend/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func gRPCPredictOpts(c config.ModelConfig, modelPath string) *pb.PredictOptions
MinP: float32(*c.MinP),
Tokens: int32(*c.Maxtokens),
Threads: int32(*c.Threads),
PromptCacheAll: c.PromptCacheAll,
PromptCacheAll: *c.PromptCacheAll,
PromptCacheRO: c.PromptCacheRO,
PromptCachePath: promptCachePath,
F16KV: *c.F16,
Expand Down
32 changes: 32 additions & 0 deletions core/config/hooks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,36 @@ var _ = Describe("Backend hooks and parser defaults", func() {
Expect(cfg.EngineArgs["enable_chunked_prefill"]).To(Equal(true))
})
})

Context("PromptCacheAll default", func() {
It("defaults to true when omitted from YAML", func() {
cfg := &ModelConfig{}
cfg.SetDefaults()

Expect(cfg.PromptCacheAll).NotTo(BeNil())
Expect(*cfg.PromptCacheAll).To(BeTrue())
})

It("preserves an explicit false from YAML", func() {
falseV := false
cfg := &ModelConfig{
LLMConfig: LLMConfig{PromptCacheAll: &falseV},
}
cfg.SetDefaults()

Expect(cfg.PromptCacheAll).NotTo(BeNil())
Expect(*cfg.PromptCacheAll).To(BeFalse())
})

It("preserves an explicit true from YAML", func() {
trueV := true
cfg := &ModelConfig{
LLMConfig: LLMConfig{PromptCacheAll: &trueV},
}
cfg.SetDefaults()

Expect(cfg.PromptCacheAll).NotTo(BeNil())
Expect(*cfg.PromptCacheAll).To(BeTrue())
})
})
})
9 changes: 8 additions & 1 deletion core/config/model_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ type LLMConfig struct {
RMSNormEps float32 `yaml:"rms_norm_eps,omitempty" json:"rms_norm_eps,omitempty"`
NGQA int32 `yaml:"ngqa,omitempty" json:"ngqa,omitempty"`
PromptCachePath string `yaml:"prompt_cache_path,omitempty" json:"prompt_cache_path,omitempty"`
PromptCacheAll bool `yaml:"prompt_cache_all,omitempty" json:"prompt_cache_all,omitempty"`
PromptCacheAll *bool `yaml:"prompt_cache_all,omitempty" json:"prompt_cache_all,omitempty"`
PromptCacheRO bool `yaml:"prompt_cache_ro,omitempty" json:"prompt_cache_ro,omitempty"`
MirostatETA *float64 `yaml:"mirostat_eta,omitempty" json:"mirostat_eta,omitempty"`
MirostatTAU *float64 `yaml:"mirostat_tau,omitempty" json:"mirostat_tau,omitempty"`
Expand Down Expand Up @@ -494,6 +494,13 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
cfg.Reranking = &falseV
}

if cfg.PromptCacheAll == nil {
// Match upstream llama.cpp's default (common/common.h: cache_prompt = true)
// and let cache_idle_slots / kv_unified actually do useful work; users can
// opt out with an explicit `prompt_cache_all: false` in the model YAML.
cfg.PromptCacheAll = &trueV
}

if threads == 0 {
// Threads can't be 0
threads = 4
Expand Down
Loading