From 4142c3ef7c4c543bf9735cdddb99d4570071c5bd Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 10 May 2024 13:53:21 -0700 Subject: [PATCH] Always use the sorted list of GPUs Make sure the first GPU has the most free space --- server/sched.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/sched.go b/server/sched.go index bbf333d7da..eff2b1177d 100644 --- a/server/sched.go +++ b/server/sched.go @@ -567,9 +567,9 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu. // - try subsets of GPUs instead of just falling back to 1 or all in a family // Now try all the GPUs - if ok, estimatedVRAM = llm.PredictServerFit(gl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok { - slog.Debug("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", gl[0].Library, "required", format.HumanBytes2(estimatedVRAM)) - return gl + if ok, estimatedVRAM = llm.PredictServerFit(sgl, ggml, req.model.AdapterPaths, req.model.ProjectorPaths, req.opts); ok { + slog.Debug("new model will fit in available VRAM, loading", "model", req.model.ModelPath, "library", sgl[0].Library, "required", format.HumanBytes2(estimatedVRAM)) + return sgl } } return nil