pavanjava
diff --git a/‎bootstraprag/templates/llamaindex/rag_with_flare/.env‎
Lines changed: 3 additions & 0 deletions b/‎bootstraprag/templates/llamaindex/rag_with_flare/.env‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎bootstraprag/templates/llamaindex/rag_with_flare/api_core/__init__.py‎ b/‎bootstraprag/templates/llamaindex/rag_with_flare/api_core/__init__.py‎
diff --git a/‎bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py‎
Lines changed: 0 additions & 5 deletions b/‎bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎bootstraprag/templates/llamaindex/rag_with_flare/api_routes/__init__.py‎ b/‎bootstraprag/templates/llamaindex/rag_with_flare/api_routes/__init__.py‎
diff --git a/‎bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py‎
Lines changed: 0 additions & 14 deletions b/‎bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py‎
Lines changed: 0 additions & 14 deletions
diff --git a/‎bootstraprag/templates/llamaindex/rag_with_flare/api_server.py‎
Lines changed: 34 additions & 0 deletions b/‎bootstraprag/templates/llamaindex/rag_with_flare/api_server.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎bootstraprag/templates/llamaindex/rag_with_flare/apis.py‎
Lines changed: 0 additions & 110 deletions b/‎bootstraprag/templates/llamaindex/rag_with_flare/apis.py‎
Lines changed: 0 additions & 110 deletions
diff --git a/‎bootstraprag/templates/llamaindex/rag_with_flare/models/__init__.py‎ b/‎bootstraprag/templates/llamaindex/rag_with_flare/models/__init__.py‎
diff --git a/‎bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py‎
Lines changed: 0 additions & 5 deletions b/‎bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎bootstraprag/templates/llamaindex/rag_with_flare/readme.md‎
Lines changed: 17 additions & 0 deletions b/‎bootstraprag/templates/llamaindex/rag_with_flare/readme.md‎
Lines changed: 17 additions & 0 deletions
@@ -19,3 +19,6 @@ WARN = 30
 INFO = 20
 DEBUG = 10
 NOTSET = 0
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
@@ -0,0 +1,34 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from base_rag import BaseRAG
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class ReactRAGServingAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.base_rag = None
+
+    def setup(self, devices):
+        self.base_rag = BaseRAG(show_progress=True, data_path='data')
+
+    def decode_request(self, request, **kwargs):
+        return request["query"]
+
+    def predict(self, query: str):
+        try:
+            return self.base_rag.query(query_string=query)
+        except Exception as e:
+            return e.args[0]
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = ReactRAGServingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
@@ -6,3 +6,20 @@
 - In the data folder place your data preferably any ".pdf"
 #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running
 - run `python main.py`
+
+### How to expose RAG as API
+- run `python api_server.py`
+- verify the swagger redoc and documentation as below
+- open browser and hit `http://localhost:8000/redoc`
+- open browser and hit `http://localhost:8000/docs`
+
+### Payload Specification
+
+- Method: POST
+- API: http://localhost:8000/api/v1/chat-completion
+- Body:
+```json
+{
+  "query": "explain mlops architecture"
+}
+```