add a host-side ML API flask app, initially to expose GPU-bound embed…

…dings generation; juggle dev-like things around a bit
lmorchard · Apr 25, 2024 · dfb9ff1 · dfb9ff1
1 parent d1bdc5b
commit dfb9ff1
Show file tree

Hide file tree

Showing 15 changed files with 478 additions and 579 deletions.
diff --git a/Procfile b/Procfile
diff --git a/Procfile-dev b/Procfile-dev
@@ -0,0 +1,3 @@
+docker_services: docker compose -f compose-dev.yaml up
+mlapi_service: ./scripts/start-mlapi-dev.sh
+host_notebook: ./scripts/start-host-notebook.sh
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ Finally, install dependencies and start up the mix of docker and host services:
 python3 -m venv venv
 . ./venv/bin/activate
 pip install -r requirements.txt
-./scripts/start.sh
+./scripts/start-dev.sh
 ```
 
 Somewhere in those startup messages, you should see a URL for a jupyter notebook. Open that in your browser and have fun!

diff --git a/compose.yaml → compose-dev.yaml b/compose.yaml → compose-dev.yaml
@@ -1,7 +1,5 @@
 version: '3'
 
-
-
 services:
   db:
     image: pgvector/pgvector:pg16
@@ -23,10 +21,10 @@ services:
       timeout: 5s
       retries: 5
 
-  embeddings:
-    image: ghcr.io/clems4ever/torchserve-all-minilm-l6-v2:latest
-    ports:
-      - 8674:8080
+  #embeddings:
+  #  image: ghcr.io/clems4ever/torchserve-all-minilm-l6-v2:latest
+  #  ports:
+  #    - 8674:8080
 
   bot:
     build:
@@ -36,11 +34,14 @@ services:
     volumes:
       - ./:/app
       - ./log:/log
+    # mlapi will run on host for GPU access, so we need this gateway
+    extra_hosts:
+        - "host.docker.internal:host-gateway"
     environment:
       MASTODON_AGENT_LOG_LEVEL: DEBUG
       MASTODON_AGENT_DEBUG: True
       MASTODON_AGENT_DATABASE_URL: &database-url "postgresql://postgres:8675309jenny@db:5432/example"
-      MASTODON_AGENT_EMBEDDINGS_URL: &embeddings-url "http://embeddings:8080/predictions/my_model"
+      MASTODON_AGENT_ML_API_URL: &mlapi-url "http://host.docker.internal:8673"
     depends_on:
       db:
         condition: service_healthy
@@ -55,11 +56,14 @@ services:
     volumes:
       - ./:/app
       - ./log:/log
+    # mlapi will run on host for GPU access, so we need this gateway
+    extra_hosts:
+        - "host.docker.internal:host-gateway"
     environment:
       MASTODON_AGENT_LOG_LEVEL: DEBUG
       MASTODON_AGENT_DEBUG: True
       MASTODON_AGENT_DATABASE_URL: *database-url
-      MASTODON_AGENT_EMBEDDINGS_URL: *embeddings-url
+      MASTODON_AGENT_ML_API_URL: *mlapi-url
     depends_on:
       db:
         condition: service_healthy

diff --git a/docker/app_base b/docker/app_base
@@ -44,14 +44,14 @@ EXPOSE ${PORT}
 
 FROM base as appserver_dev
 ENV FLASK_ENV=development
-ENTRYPOINT /app/scripts/start-app-dev.sh
+ENTRYPOINT /app/scripts/start-webapp-dev.sh
 
 # --------------------------------------------------------------
 
 FROM base as appserver_prod
 ENV FLASK_ENV=production
 COPY . .
-ENTRYPOINT /app/scripts/start-app.sh
+ENTRYPOINT /app/scripts/start-webapp.sh
 
 # --------------------------------------------------------------
 

diff --git a/mastodon_agent/config.py b/mastodon_agent/config.py
@@ -20,10 +20,15 @@ class Config:
     client_secret: str = ""
     access_token: str = ""
 
+    # hugging face API token
     hf_token: str = ""
 
+    # CPU-bound torchserve docker container for emebeddings model
     embeddings_api_url: str = ""
 
+    # in-house proxy API to ML services
+    ml_api_url: str = ""
+
     user_agent: str = "PubPulse 0.1"
     debug_requests: bool = False
 

diff --git a/mastodon_agent/mlapi.py b/mastodon_agent/mlapi.py
@@ -0,0 +1,30 @@
+import json
+
+from flask import Flask, request, jsonify
+from flask_sqlalchemy import SQLAlchemy
+from flask_migrate import Migrate
+
+from sentence_transformers import SentenceTransformer
+
+from .config import config
+from .log import logger
+from .models import db
+
+
+app = Flask(__name__)
+embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+
+
+@app.route('/')
+def root():
+    return 'Hello'
+
+
+@app.route('/embeddings', methods=['POST'])
+def embedding():
+    data = request.get_json()
+    chunks = data['inputs']
+    logger.info("Received embeddings request with %d chunks", len(chunks))
+    embeddings = embedding_model.encode(chunks)
+    return jsonify(embeddings.tolist())
+