Skip to content

Commit

Permalink
Improve production deployment
Browse files Browse the repository at this point in the history
  • Loading branch information
medihack committed Jun 11, 2024
1 parent c98e156 commit 485939f
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 42 deletions.
10 changes: 8 additions & 2 deletions compose/docker-compose.base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,21 @@ x-app: &default-app
FLOWER_PORT: "5555"

services:
init:
<<: *default-app
hostname: init.local
volumes:
- radis_data:/var/www/radis
- /mnt:/mnt

web:
<<: *default-app
hostname: web.local
build:
context: ..
volumes:
# Cave, overwrites the above anchor
- /mnt:/mnt
- radis_data:/var/www/radis
- /mnt:/mnt

worker_default:
<<: *default-app
Expand Down
4 changes: 2 additions & 2 deletions compose/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ x-llamacpp: &llamacpp
env_file:
- ../.env.dev
hostname: llamacpp.local
volumes:
- models_data:/models
ports:
- 9610:8080
volumes:
- models_data:/models
entrypoint: "/bin/bash -c '/server -mu $${LLM_MODEL_URL} -c 512 --host 0.0.0.0 --port 8080'"

services:
Expand Down
88 changes: 50 additions & 38 deletions compose/docker-compose.prod.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
x-app: &default-app
restart: always
image: radis_prod:latest
env_file:
- ../.env.prod
Expand All @@ -9,89 +8,107 @@ x-app: &default-app
SSL_CERT_FILE: "/var/www/radis/ssl/cert.pem"
SSL_KEY_FILE: "/var/www/radis/ssl/key.pem"

x-deploy: &deploy
replicas: 1
restart_policy:
condition: on-failure
max_attempts: 3

services:
web:
# We can't use those manage commands inside the web container in production because
# the web service may have multiple replicas. So we make sure to only run them once
# and wait for it to be finished by the web service containers.
init:
<<: *default-app
build:
target: production
ports:
- "${RADIS_HTTP_PORT:-80}:80"
- "${RADIS_HTTPS_PORT:-443}:443"
hostname: init.local
command: >
bash -c "
wait-for-it -s postgres.local:5432 -t 60 &&
wait-for-it -s postgres.local:5432 -t 120 &&
./manage.py migrate &&
./manage.py collectstatic --no-input &&
./manage.py create_admin &&
./manage.py generate_cert &&
wait-for-it -s opensearch-node1.local:9200 -t 60 &&
./manage.py setup_opensearch &&
./manage.py opensearch --mappings prod &&
# wait-for-it -s vespa.local:19071 -t 60 &&
# ./manage.py vespa --generate --deploy &&
./manage.py ok_server --host 0.0.0.0 --port 8000
"
deploy:
<<: *deploy

web:
<<: *default-app
build:
target: production
ports:
- "${RADIS_HTTP_PORT:-80}:80"
- "${RADIS_HTTPS_PORT:-443}:443"
command: >
bash -c "
wait-for-it -s init.local:8000 -t 300 &&
echo 'Starting web server ...'
daphne -b 0.0.0.0 -p 80 -e ssl:443:privateKey=/var/www/radis/ssl/key.pem:certKey=/var/www/radis/ssl/cert.pem radis.asgi:application
"
deploy:
replicas: 1
<<: *deploy
replicas: 3

worker_default:
<<: *default-app
command: ./manage.py celery_worker -Q default_queue
deploy:
replicas: 1
<<: *deploy

worker_vespa:
<<: *default-app
command: ./manage.py celery_worker -c 1 -Q vespa_queue
deploy:
replicas: 0
<<: *deploy

worker_llm:
<<: *default-app
command: ./manage.py celery_worker -c 1 -Q llm_queue
deploy:
replicas: 1
<<: *deploy

celery_beat:
<<: *default-app
command: ./manage.py celery_beat
deploy:
replicas: 1
<<: *deploy

flower:
<<: *default-app
deploy:
replicas: 1
<<: *deploy

llamacpp-gpu:
restart: always
image: ghcr.io/ggerganov/llama.cpp:server-cuda
environment:
MODEL_URL: https://huggingface.co/MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF/resolve/main/Mistral-7B-Instruct-v0.3.Q5_K_M.gguf
hostname: llamacpp.local
env_file:
- ../.env.prod
ports:
- 9610:8080
volumes:
- models_data:/models
command: "-mu $${MODEL_URL} -m /models/model.gguf -cb -c 2048 --host 0.0.0.0 --port 8080"
entrypoint: "/bin/bash -c '/server -mu $${LLM_MODEL_URL} -cb -c 2048 --host 0.0.0.0 --port 8080'"
deploy:
replicas: 1
# <<: *deploy
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
generic_resources:
- discrete_resource_spec:
kind: "gpu"
value: 1

postgres:
restart: always
env_file:
- ../.env.prod
deploy:
replicas: 1
<<: *deploy

opensearch-node1:
restart: always
image: opensearchproject/opensearch:2
hostname: opensearch-node1.local
environment:
Expand All @@ -116,10 +133,9 @@ services:
- 9200:9200 # REST API
- 9600:9600 # Performance Analyzer
deploy:
replicas: 1
<<: *deploy

opensearch-node2:
restart: always
image: opensearchproject/opensearch:2
hostname: opensearch-node2.local
environment:
Expand All @@ -141,10 +157,9 @@ services:
volumes:
- opensearch-data2:/usr/share/opensearch/data
deploy:
replicas: 1
<<: *deploy

opensearch-node3:
restart: always
image: opensearchproject/opensearch:2
hostname: opensearch-node3.local
environment:
Expand All @@ -166,7 +181,7 @@ services:
volumes:
- opensearch-data2:/usr/share/opensearch/data
deploy:
replicas: 1
<<: *deploy

opensearch-dashboards:
image: opensearchproject/opensearch-dashboards:2
Expand All @@ -175,10 +190,9 @@ services:
environment:
OPENSEARCH_HOSTS: '["https://opensearch-node1:9200","https://opensearch-node2:9200","https://opensearch-node3:9200"]'
deploy:
replicas: 1
<<: *deploy

vespa:
restart: always
image: vespaengine/vespa:8
hostname: vespa.local
healthcheck:
Expand All @@ -195,14 +209,12 @@ services:
replicas: 0

rabbit:
restart: always
deploy:
replicas: 1
<<: *deploy

redis:
restart: always
deploy:
replicas: 1
<<: *deploy

volumes:
models_data:
Expand Down

0 comments on commit 485939f

Please sign in to comment.