Skip to content

Commit

Permalink
Added database to inference server (#1446)
Browse files Browse the repository at this point in the history
* added db for inference

* fixed dockerfiles for inference
  • Loading branch information
yk committed Feb 10, 2023
1 parent 911fc2a commit 90c3d56
Show file tree
Hide file tree
Showing 21 changed files with 627 additions and 192 deletions.
53 changes: 30 additions & 23 deletions docker-compose.yaml
Expand Up @@ -136,6 +136,23 @@ services:
- "3000:3000"
command: bash wait-for-postgres.sh node server.js

# This DB is for Inference
inference-db:
image: postgres
restart: always
ports:
- 5434:5432
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: oasst_inference
healthcheck:
test: ["CMD", "pg_isready", "-U", "postgres"]
interval: 2s
timeout: 2s
retries: 10
profiles: ["inference"]

inference-server:
build:
dockerfile: docker/inference/Dockerfile.server
Expand All @@ -145,13 +162,25 @@ services:
environment:
- "PORT=8000"
- "REDIS_HOST=redis"
- POSTGRES_HOST=inference-db
- POSTGRES_DB=oasst_inference
volumes:
- "./oasst-shared:/opt/inference/lib/oasst-shared"
- "./inference/server:/opt/inference/server"
restart: unless-stopped
ports:
- "8000:8000"
depends_on:
redis:
condition: service_healthy
inference-db:
condition: service_healthy
profiles: ["inference"]

inference-text-generation-server:
image: ghcr.io/huggingface/text-generation-inference
environment:
- "MODEL_ID=distilgpt2"
profiles: ["inference"]

inference-worker:
Expand All @@ -167,29 +196,7 @@ services:
- "./oasst-shared:/opt/inference/lib/oasst-shared"
- "./inference/worker:/opt/inference/worker"
depends_on:
- inference-server
- inference-text-generation-server
deploy:
replicas: 1
profiles: ["inference"]

inference-text-client:
build:
dockerfile: docker/inference/Dockerfile.text-client
context: .
image: oasst-inference-text-client
environment:
- "BACKEND_URL=http://inference-server:8000"
tty: true
stdin_open: true
volumes:
- "./inference/worker:/opt/inference/worker"
restart: unless-stopped
depends_on:
- inference-server
profiles: ["inference"]

inference-text-generation-server:
image: ghcr.io/huggingface/text-generation-inference
environment:
- "MODEL_ID=distilgpt2"
profiles: ["inference"]
7 changes: 5 additions & 2 deletions docker/inference/Dockerfile.server
Expand Up @@ -7,7 +7,7 @@ ARG APP_USER="${MODULE}-${SERVICE}"
ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}"


FROM python:3-slim as build
FROM python:3.10-slim as build
ARG APP_RELATIVE_PATH

WORKDIR /build
Expand All @@ -22,7 +22,7 @@ RUN --mount=type=cache,target=/var/cache/pip \



FROM python:3.10-alpine3.17 as base-env
FROM python:3.10-slim as base-env
ARG APP_USER
ARG APP_RELATIVE_PATH
ARG MODULE
Expand Down Expand Up @@ -50,6 +50,9 @@ WORKDIR ${APP_ROOT}


COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/alembic alembic
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/alembic.ini .
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/oasst_inference_server oasst_inference_server
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/main.py .


Expand Down
50 changes: 0 additions & 50 deletions docker/inference/Dockerfile.text-client

This file was deleted.

2 changes: 1 addition & 1 deletion docker/inference/Dockerfile.worker
Expand Up @@ -48,7 +48,7 @@ WORKDIR ${APP_ROOT}


COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py .
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/*.py .


CMD python3 __main__.py --backend-url "${BACKEND_URL}" --inference-server-url "${INFERENCE_SERVER_URL}"
Expand Down
6 changes: 4 additions & 2 deletions inference/full-dev-setup.sh
Expand Up @@ -3,9 +3,11 @@
# Creates a tmux window with splits for the individual services

tmux new-session -d -s "inference-dev-setup"
tmux send-keys "docker run --rm -it -p 6379:6379 redis" C-m
tmux send-keys "docker run --rm -it -p 5432:5432 -e POSTGRES_PASSWORD=postgres --name postgres postgres" C-m
tmux split-window -h
tmux send-keys "docker run --rm -it -p 8001:80 -e MODEL_ID=distilgpt2 ghcr.io/huggingface/text-generation-inference" C-m
tmux send-keys "docker run --rm -it -p 6379:6379 --name redis redis" C-m
tmux split-window -h
tmux send-keys "docker run --rm -it -p 8001:80 -e MODEL_ID=distilgpt2 -v $HOME/.cache/huggingface:/root/.cache/huggingface --name text-generation-inference ghcr.io/huggingface/text-generation-inference" C-m
tmux split-window -h
tmux send-keys "cd server" C-m
tmux send-keys "uvicorn main:app --reload" C-m
Expand Down
105 changes: 105 additions & 0 deletions inference/server/alembic.ini
@@ -0,0 +1,105 @@
# A generic, single database configuration.

[alembic]
# path to migration scripts
script_location = %(here)s/alembic

# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
# for all available tokens
file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s

# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory.
prepend_sys_path = .

# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python-dateutil library that can be
# installed by adding `alembic[tz]` to the pip requirements
# string value is passed to dateutil.tz.gettz()
# leave blank for localtime
# timezone =

# max length of characters to apply to the
# "slug" field
# truncate_slug_length = 40

# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false

# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false

# version location specification; This defaults
# to alembic/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "version_path_separator" below.
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions

# version path separator; As mentioned above, this is the character used to split
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
# Valid values for version_path_separator are:
#
# version_path_separator = :
# version_path_separator = ;
# version_path_separator = space
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.

# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8

# sqlalchemy.url = postgresql://<username>:<password>@<host>/<database_name>
sqlalchemy.url = postgresql://postgres:postgres@localhost:5432/postgres

[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples

# format using "black" - use the console_scripts runner, against the "black" entrypoint
hooks = black
black.type = console_scripts
black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME

# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console
qualname =

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers =
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
1 change: 1 addition & 0 deletions inference/server/alembic/README
@@ -0,0 +1 @@
Generic single-database configuration.
78 changes: 78 additions & 0 deletions inference/server/alembic/env.py
@@ -0,0 +1,78 @@
from logging.config import fileConfig

import sqlmodel
from alembic import context
from oasst_inference_server import models # noqa: F401
from sqlalchemy import engine_from_config, pool

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)

# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = sqlmodel.SQLModel.metadata

# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.


def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)

with context.begin_transaction():
context.run_migrations()


def run_migrations_online() -> None:
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)

with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata)

with context.begin_transaction():
context.get_context()._ensure_version_table()
connection.execute("LOCK TABLE alembic_version IN ACCESS EXCLUSIVE MODE")
context.run_migrations()


if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
25 changes: 25 additions & 0 deletions inference/server/alembic/script.py.mako
@@ -0,0 +1,25 @@
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}

"""
from alembic import op
import sqlalchemy as sa
import sqlmodel
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}


def upgrade() -> None:
${upgrades if upgrades else "pass"}


def downgrade() -> None:
${downgrades if downgrades else "pass"}
Empty file.

0 comments on commit 90c3d56

Please sign in to comment.