Skip to content

Commit

Permalink
Also load large LLMs fully on GPU
Browse files Browse the repository at this point in the history
  • Loading branch information
medihack committed Jun 18, 2024
1 parent 417857a commit d989d10
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion compose/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ services:
llamacpp_gpu:
<<: *llamacpp
image: ghcr.io/ggerganov/llama.cpp:server-cuda
entrypoint: "/bin/bash -c '/llama-server -mu $${LLM_MODEL_URL} -ngl 50 -c 4096 --host 0.0.0.0 --port 8080'"
entrypoint: "/bin/bash -c '/llama-server -mu $${LLM_MODEL_URL} -ngl 99 -c 4096 --host 0.0.0.0 --port 8080'"
deploy:
resources:
reservations:
Expand Down
2 changes: 1 addition & 1 deletion compose/docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ services:
- 9610:8080
volumes:
- models_data:/models
entrypoint: "/bin/bash -c '/llama-server -mu $${LLM_MODEL_URL} -ngl 50 -cb -c 4096 --host 0.0.0.0 --port 8080'"
entrypoint: "/bin/bash -c '/llama-server -mu $${LLM_MODEL_URL} -ngl 99 -cb -c 4096 --host 0.0.0.0 --port 8080'"
deploy:
# <<: *deploy
resources:
Expand Down

0 comments on commit d989d10

Please sign in to comment.