Skip to content

Commit

Permalink
Merge pull request #3684 from ollama/mxyng/scale-graph
Browse files Browse the repository at this point in the history
scale graph based on gpu count
  • Loading branch information
mxyng committed Apr 16, 2024
2 parents 7c9792a + 26df674 commit fb9580d
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
2 changes: 1 addition & 1 deletion gpu/gpu_darwin.go
Expand Up @@ -55,6 +55,6 @@ func getCPUMem() (memInfo, error) {
return memInfo{
TotalMemory: uint64(C.getPhysicalMemory()),
FreeMemory: 0,
DeviceCount: 0,
DeviceCount: 1,
}, nil
}
3 changes: 3 additions & 0 deletions llm/server.go
Expand Up @@ -79,6 +79,9 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
graphFullOffload = graphPartialOffload
}

graphFullOffload *= uint64(info.DeviceCount)
graphPartialOffload *= uint64(info.DeviceCount)

// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
memoryRequiredTotal := memoryMinimum + graphFullOffload

Expand Down

0 comments on commit fb9580d

Please sign in to comment.