Skip to content

Commit

Permalink
add a host-side ML API flask app, initially to expose GPU-bound embed…
Browse files Browse the repository at this point in the history
…dings generation; juggle dev-like things around a bit
  • Loading branch information
lmorchard committed Apr 25, 2024
1 parent d1bdc5b commit dfb9ff1
Show file tree
Hide file tree
Showing 15 changed files with 478 additions and 579 deletions.
2 changes: 0 additions & 2 deletions Procfile

This file was deleted.

3 changes: 3 additions & 0 deletions Procfile-dev
@@ -0,0 +1,3 @@
docker_services: docker compose -f compose-dev.yaml up
mlapi_service: ./scripts/start-mlapi-dev.sh
host_notebook: ./scripts/start-host-notebook.sh
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -16,7 +16,7 @@ Finally, install dependencies and start up the mix of docker and host services:
python3 -m venv venv
. ./venv/bin/activate
pip install -r requirements.txt
./scripts/start.sh
./scripts/start-dev.sh
```

Somewhere in those startup messages, you should see a URL for a jupyter notebook. Open that in your browser and have fun!
Expand Down
20 changes: 12 additions & 8 deletions compose.yaml → compose-dev.yaml
@@ -1,7 +1,5 @@
version: '3'



services:
db:
image: pgvector/pgvector:pg16
Expand All @@ -23,10 +21,10 @@ services:
timeout: 5s
retries: 5

embeddings:
image: ghcr.io/clems4ever/torchserve-all-minilm-l6-v2:latest
ports:
- 8674:8080
#embeddings:
# image: ghcr.io/clems4ever/torchserve-all-minilm-l6-v2:latest
# ports:
# - 8674:8080

bot:
build:
Expand All @@ -36,11 +34,14 @@ services:
volumes:
- ./:/app
- ./log:/log
# mlapi will run on host for GPU access, so we need this gateway
extra_hosts:
- "host.docker.internal:host-gateway"
environment:
MASTODON_AGENT_LOG_LEVEL: DEBUG
MASTODON_AGENT_DEBUG: True
MASTODON_AGENT_DATABASE_URL: &database-url "postgresql://postgres:8675309jenny@db:5432/example"
MASTODON_AGENT_EMBEDDINGS_URL: &embeddings-url "http://embeddings:8080/predictions/my_model"
MASTODON_AGENT_ML_API_URL: &mlapi-url "http://host.docker.internal:8673"
depends_on:
db:
condition: service_healthy
Expand All @@ -55,11 +56,14 @@ services:
volumes:
- ./:/app
- ./log:/log
# mlapi will run on host for GPU access, so we need this gateway
extra_hosts:
- "host.docker.internal:host-gateway"
environment:
MASTODON_AGENT_LOG_LEVEL: DEBUG
MASTODON_AGENT_DEBUG: True
MASTODON_AGENT_DATABASE_URL: *database-url
MASTODON_AGENT_EMBEDDINGS_URL: *embeddings-url
MASTODON_AGENT_ML_API_URL: *mlapi-url
depends_on:
db:
condition: service_healthy
Expand Down
4 changes: 2 additions & 2 deletions docker/app_base
Expand Up @@ -44,14 +44,14 @@ EXPOSE ${PORT}

FROM base as appserver_dev
ENV FLASK_ENV=development
ENTRYPOINT /app/scripts/start-app-dev.sh
ENTRYPOINT /app/scripts/start-webapp-dev.sh

# --------------------------------------------------------------

FROM base as appserver_prod
ENV FLASK_ENV=production
COPY . .
ENTRYPOINT /app/scripts/start-app.sh
ENTRYPOINT /app/scripts/start-webapp.sh

# --------------------------------------------------------------

Expand Down
5 changes: 5 additions & 0 deletions mastodon_agent/config.py
Expand Up @@ -20,10 +20,15 @@ class Config:
client_secret: str = ""
access_token: str = ""

# hugging face API token
hf_token: str = ""

# CPU-bound torchserve docker container for emebeddings model
embeddings_api_url: str = ""

# in-house proxy API to ML services
ml_api_url: str = ""

user_agent: str = "PubPulse 0.1"
debug_requests: bool = False

Expand Down
30 changes: 30 additions & 0 deletions mastodon_agent/mlapi.py
@@ -0,0 +1,30 @@
import json

from flask import Flask, request, jsonify
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate

from sentence_transformers import SentenceTransformer

from .config import config
from .log import logger
from .models import db


app = Flask(__name__)
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')


@app.route('/')
def root():
return 'Hello'


@app.route('/embeddings', methods=['POST'])
def embedding():
data = request.get_json()
chunks = data['inputs']
logger.info("Received embeddings request with %d chunks", len(chunks))
embeddings = embedding_model.encode(chunks)
return jsonify(embeddings.tolist())

0 comments on commit dfb9ff1

Please sign in to comment.