pavanjava · pavanjava · Sep 28, 2024 · Sep 28, 2024 · Sep 28, 2024 · Sep 28, 2024
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/.env b/bootstraprag/templates/llamaindex/rag_with_flare/.env
@@ -19,3 +19,6 @@ WARN = 30
 INFO = 20
 DEBUG = 10
 NOTSET = 0
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_core/__init__.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/__init__.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_server.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_server.py
@@ -0,0 +1,34 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from base_rag import BaseRAG
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class ReactRAGServingAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.base_rag = None
+
+    def setup(self, devices):
+        self.base_rag = BaseRAG(show_progress=True, data_path='data')
+
+    def decode_request(self, request, **kwargs):
+        return request["query"]
+
+    def predict(self, query: str):
+        try:
+            return self.base_rag.query(query_string=query)
+        except Exception as e:
+            return e.args[0]
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = ReactRAGServingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/apis.py b/bootstraprag/templates/llamaindex/rag_with_flare/apis.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_flare/models/__init__.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/readme.md b/bootstraprag/templates/llamaindex/rag_with_flare/readme.md
@@ -6,3 +6,20 @@
 - In the data folder place your data preferably any ".pdf"
 #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running
 - run `python main.py`
+
+### How to expose RAG as API
+- run `python api_server.py`
+- verify the swagger redoc and documentation as below
+- open browser and hit `http://localhost:8000/redoc`
+- open browser and hit `http://localhost:8000/docs`
+
+### Payload Specification
+
+- Method: POST
+- API: http://localhost:8000/api/v1/chat-completion
+- Body:
+```json
+{
+  "query": "explain mlops architecture"
+}
+```
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_flare/requirements.txt
@@ -7,3 +7,4 @@ llama-index-embeddings-openai==0.1.11
 llama-index-embeddings-ollama==0.1.2
 llama-index-vector-stores-qdrant==0.2.14
 pydantic==2.9.0
+litserve==0.2.2
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/.env b/bootstraprag/templates/llamaindex/rag_with_hyde/.env
@@ -19,3 +19,6 @@ WARN = 30
 INFO = 20
 DEBUG = 10
 NOTSET = 0
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/__init__.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/config.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/__init__.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/apis.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_server.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_server.py
@@ -0,0 +1,34 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from base_rag import BaseRAG
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class ReactRAGServingAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.base_rag = None
+
+    def setup(self, devices):
+        self.base_rag = BaseRAG(show_progress=True, data_path='data')
+
+    def decode_request(self, request, **kwargs):
+        return request["query"]
+
+    def predict(self, query: str):
+        try:
+            return self.base_rag.query(query_string=query)
+        except Exception as e:
+            return e.args[0]
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = ReactRAGServingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/apis.py b/bootstraprag/templates/llamaindex/rag_with_hyde/apis.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde/models/__init__.py
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_hyde/models/payload.py