From 545b30126a579bc5f89c0692593ee9cb08a2efde Mon Sep 17 00:00:00 2001
From: pavanmantha <pavan.mantha@thevaslabs.io>
Date: Sat, 28 Sep 2024 18:46:39 +0530
Subject: [PATCH 1/3] -modified fastapi to litserve

---
 .../templates/llamaindex/rag_with_flare/.env  |   3 +
 .../rag_with_flare/api_core/__init__.py       |   0
 .../rag_with_flare/api_core/config.py         |   5 -
 .../rag_with_flare/api_routes/__init__.py     |   0
 .../rag_with_flare/api_routes/apis.py         |  14 ---
 .../llamaindex/rag_with_flare/api_server.py   |  34 ++++++
 .../llamaindex/rag_with_flare/apis.py         | 110 ------------------
 .../rag_with_flare/models/__init__.py         |   0
 .../rag_with_flare/models/payload.py          |   5 -
 .../llamaindex/rag_with_flare/readme.md       |  17 +++
 .../rag_with_flare/requirements.txt           |   1 +
 .../templates/llamaindex/rag_with_hyde/.env   |   3 +
 .../rag_with_hyde/api_core/__init__.py        |   0
 .../rag_with_hyde/api_core/config.py          |   5 -
 .../rag_with_hyde/api_routes/__init__.py      |   0
 .../rag_with_hyde/api_routes/apis.py          |  14 ---
 .../llamaindex/rag_with_hyde/api_server.py    |  34 ++++++
 .../llamaindex/rag_with_hyde/apis.py          | 110 ------------------
 .../rag_with_hyde/models/__init__.py          |   0
 .../rag_with_hyde/models/payload.py           |   5 -
 .../llamaindex/rag_with_hyde/readme.md        |  17 +++
 .../llamaindex/rag_with_hyde/requirements.txt |   3 +-
 .../rag_with_hyde_with_observability/.env     |   3 +
 .../api_core/__init__.py                      |   0
 .../api_core/config.py                        |   5 -
 .../api_routes/__init__.py                    |   0
 .../api_routes/apis.py                        |  14 ---
 .../api_server.py                             |  34 ++++++
 .../rag_with_hyde_with_observability/apis.py  | 110 ------------------
 .../models/__init__.py                        |   0
 .../models/payload.py                         |   5 -
 .../readme.md                                 |  17 +++
 .../requirements.txt                          |   3 +-
 .../templates/llamaindex/rag_with_react/.env  |   3 +
 .../rag_with_react/api_core/__init__.py       |   0
 .../rag_with_react/api_core/config.py         |   5 -
 .../rag_with_react/api_routes/__init__.py     |   0
 .../rag_with_react/api_routes/apis.py         |  14 ---
 .../llamaindex/rag_with_react/api_server.py   |  34 ++++++
 .../llamaindex/rag_with_react/apis.py         | 110 ------------------
 .../rag_with_react/models/__init__.py         |   0
 .../rag_with_react/models/payload.py          |   5 -
 .../llamaindex/rag_with_react/readme.md       |  17 +++
 .../rag_with_react/requirements.txt           |   5 +-
 .../rag_with_react_with_observability/.env    |   3 +
 .../api_core/__init__.py                      |   0
 .../api_core/config.py                        |   5 -
 .../api_routes/__init__.py                    |   0
 .../api_routes/apis.py                        |  14 ---
 .../api_server.py                             |  34 ++++++
 .../rag_with_react_with_observability/apis.py | 110 ------------------
 .../models/__init__.py                        |   0
 .../models/payload.py                         |   5 -
 .../readme.md                                 |  17 +++
 .../requirements.txt                          |   3 +-
 55 files changed, 278 insertions(+), 677 deletions(-)
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/api_core/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/api_routes/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py
 create mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/api_server.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/apis.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/models/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/api_core/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/api_core/config.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/apis.py
 create mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/api_server.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/apis.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/models/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/models/payload.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/config.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/apis.py
 create mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_server.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/apis.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/payload.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/api_core/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/api_core/config.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/api_routes/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/api_routes/apis.py
 create mode 100644 bootstraprag/templates/llamaindex/rag_with_react/api_server.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/apis.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/models/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/models/payload.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/config.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/apis.py
 create mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/apis.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/__init__.py
 delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/payload.py

diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/.env b/bootstraprag/templates/llamaindex/rag_with_flare/.env
index 0184b1e..88b8230 100644
--- a/bootstraprag/templates/llamaindex/rag_with_flare/.env
+++ b/bootstraprag/templates/llamaindex/rag_with_flare/.env
@@ -19,3 +19,6 @@ WARN = 30
 INFO = 20
 DEBUG = 10
 NOTSET = 0
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_core/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py
deleted file mode 100644
index d944427..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py
+++ /dev/null
@@ -1,5 +0,0 @@
-class Settings:
-    PROJECT_NAME: str = "Simple RAG as FastAPI Application"
-
-
-settings = Settings()
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py
deleted file mode 100644
index 82f1533..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from fastapi import APIRouter, Depends
-from models.payload import Payload
-from base_rag import BaseRAG
-
-
-base_rag = BaseRAG(show_progress=True, data_path='data')
-
-router = APIRouter(prefix="/api/v1/rag", tags=["rag"])
-
-
-@router.post(path='/query')
-def fetch_response(payload: Payload):
-    response = base_rag.query(query_string=payload.query)
-    return response
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_server.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_server.py
new file mode 100644
index 0000000..d6175fe
--- /dev/null
+++ b/bootstraprag/templates/llamaindex/rag_with_flare/api_server.py
@@ -0,0 +1,34 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from base_rag import BaseRAG
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class ReactRAGServingAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.base_rag = None
+
+    def setup(self, devices):
+        self.base_rag = BaseRAG(show_progress=True, data_path='data')
+
+    def decode_request(self, request, **kwargs):
+        return request["query"]
+
+    def predict(self, query: str):
+        try:
+            return self.base_rag.query(query_string=query)
+        except Exception as e:
+            return e.args[0]
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = ReactRAGServingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/apis.py b/bootstraprag/templates/llamaindex/rag_with_flare/apis.py
deleted file mode 100644
index 3a885a5..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_flare/apis.py
+++ /dev/null
@@ -1,110 +0,0 @@
-from fastapi import FastAPI, Request
-from fastapi.openapi.utils import get_openapi
-from api_routes.apis import router
-from fastapi.middleware.cors import CORSMiddleware
-import uvicorn
-import logging
-import time
-
-logging.basicConfig(level=logging.DEBUG)
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
-)
-logger = logging.getLogger(__name__)
-allowed_origins = [
-    "*"
-]
-
-app = FastAPI(
-    title="My FastAPI Application",
-    description="This is a FastAPI implementation for RAG application with Swagger UI configurations.",
-    version="1.0.0",
-    docs_url="/documentation",
-    redoc_url="/redoc",
-    openapi_url="/openapi.json",
-    contact={
-        "name": "M K Pavan Kumar",
-        "linkedin": "https://www.linkedin.com",
-    },
-    license_info={
-        "name": "MIT License",
-        "url": "https://opensource.org/licenses/MIT",
-    },
-    terms_of_service="https://www.yourwebsite.com/terms/",
-)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=allowed_origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-app.include_router(router)
-
-
-# Custom OpenAPI schema generation (optional)
-def custom_openapi():
-    if app.openapi_schema:
-        return app.openapi_schema
-    openapi_schema = get_openapi(
-        title="RAG APIs",
-        version="1.0.0",
-        description="This is a custom OpenAPI schema with additional metadata.",
-        routes=app.routes,
-        tags=[
-            {
-                "name": "rag",
-                "description": "Operations for RAG query.",
-            }
-        ],
-    )
-    # Modify openapi_schema as needed
-    app.openapi_schema = openapi_schema
-    return app.openapi_schema
-
-
-app.openapi = custom_openapi
-
-
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    try:
-        logger.info(f"Incoming request: {request.method} {request.url}")
-        response = await call_next(request)
-        logger.info(f"Response status: {response.status_code}")
-        return response
-    except Exception as e:
-        logger.exception(f"Error processing request: {e}")
-        raise e
-
-
-# Request Timing Middleware
-@app.middleware("http")
-async def add_process_time_header(request: Request, call_next):
-    start_time = time.time()
-    response = await call_next(request)
-    process_time = time.time() - start_time
-    response.headers["X-Process-Time"] = str(process_time)
-    logger.info(f"Processed in {process_time:.4f} seconds")
-    return response
-
-
-# Logging Middleware
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    logger.info(f"Incoming request: {request.method} {request.url}")
-    response = await call_next(request)
-    logger.info(f"Response status: {response.status_code}")
-    return response
-
-
-if __name__ == "__main__":
-    uvicorn.run(
-        "apis:app",
-        host="127.0.0.1",
-        port=8000,
-        reload=True,
-        log_level="info",
-        workers=1,
-    )
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_flare/models/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py
deleted file mode 100644
index ae09aba..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from pydantic import BaseModel
-
-
-class Payload(BaseModel):
-    query: str
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/readme.md b/bootstraprag/templates/llamaindex/rag_with_flare/readme.md
index f4524f6..de1d9f8 100644
--- a/bootstraprag/templates/llamaindex/rag_with_flare/readme.md
+++ b/bootstraprag/templates/llamaindex/rag_with_flare/readme.md
@@ -6,3 +6,20 @@
 - In the data folder place your data preferably any ".pdf"
 #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running
 - run `python main.py`
+
+### How to expose RAG as API
+- run `python api_server.py`
+- verify the swagger redoc and documentation as below
+- open browser and hit `http://localhost:8000/redoc`
+- open browser and hit `http://localhost:8000/docs`
+
+### Payload Specification
+
+- Method: POST
+- API: http://localhost:8000/api/v1/chat-completion
+- Body:
+```json
+{
+  "query": "explain mlops architecture"
+}
+```
diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_flare/requirements.txt
index 8d37fc9..a48fe28 100644
--- a/bootstraprag/templates/llamaindex/rag_with_flare/requirements.txt
+++ b/bootstraprag/templates/llamaindex/rag_with_flare/requirements.txt
@@ -7,3 +7,4 @@ llama-index-embeddings-openai==0.1.11
 llama-index-embeddings-ollama==0.1.2
 llama-index-vector-stores-qdrant==0.2.14
 pydantic==2.9.0
+litserve==0.2.2
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/.env b/bootstraprag/templates/llamaindex/rag_with_hyde/.env
index 0184b1e..88b8230 100644
--- a/bootstraprag/templates/llamaindex/rag_with_hyde/.env
+++ b/bootstraprag/templates/llamaindex/rag_with_hyde/.env
@@ -19,3 +19,6 @@ WARN = 30
 INFO = 20
 DEBUG = 10
 NOTSET = 0
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/config.py
deleted file mode 100644
index d944427..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/config.py
+++ /dev/null
@@ -1,5 +0,0 @@
-class Settings:
-    PROJECT_NAME: str = "Simple RAG as FastAPI Application"
-
-
-settings = Settings()
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/apis.py
deleted file mode 100644
index 82f1533..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/apis.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from fastapi import APIRouter, Depends
-from models.payload import Payload
-from base_rag import BaseRAG
-
-
-base_rag = BaseRAG(show_progress=True, data_path='data')
-
-router = APIRouter(prefix="/api/v1/rag", tags=["rag"])
-
-
-@router.post(path='/query')
-def fetch_response(payload: Payload):
-    response = base_rag.query(query_string=payload.query)
-    return response
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_server.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_server.py
new file mode 100644
index 0000000..d6175fe
--- /dev/null
+++ b/bootstraprag/templates/llamaindex/rag_with_hyde/api_server.py
@@ -0,0 +1,34 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from base_rag import BaseRAG
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class ReactRAGServingAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.base_rag = None
+
+    def setup(self, devices):
+        self.base_rag = BaseRAG(show_progress=True, data_path='data')
+
+    def decode_request(self, request, **kwargs):
+        return request["query"]
+
+    def predict(self, query: str):
+        try:
+            return self.base_rag.query(query_string=query)
+        except Exception as e:
+            return e.args[0]
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = ReactRAGServingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/apis.py b/bootstraprag/templates/llamaindex/rag_with_hyde/apis.py
deleted file mode 100644
index 3a885a5..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_hyde/apis.py
+++ /dev/null
@@ -1,110 +0,0 @@
-from fastapi import FastAPI, Request
-from fastapi.openapi.utils import get_openapi
-from api_routes.apis import router
-from fastapi.middleware.cors import CORSMiddleware
-import uvicorn
-import logging
-import time
-
-logging.basicConfig(level=logging.DEBUG)
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
-)
-logger = logging.getLogger(__name__)
-allowed_origins = [
-    "*"
-]
-
-app = FastAPI(
-    title="My FastAPI Application",
-    description="This is a FastAPI implementation for RAG application with Swagger UI configurations.",
-    version="1.0.0",
-    docs_url="/documentation",
-    redoc_url="/redoc",
-    openapi_url="/openapi.json",
-    contact={
-        "name": "M K Pavan Kumar",
-        "linkedin": "https://www.linkedin.com",
-    },
-    license_info={
-        "name": "MIT License",
-        "url": "https://opensource.org/licenses/MIT",
-    },
-    terms_of_service="https://www.yourwebsite.com/terms/",
-)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=allowed_origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-app.include_router(router)
-
-
-# Custom OpenAPI schema generation (optional)
-def custom_openapi():
-    if app.openapi_schema:
-        return app.openapi_schema
-    openapi_schema = get_openapi(
-        title="RAG APIs",
-        version="1.0.0",
-        description="This is a custom OpenAPI schema with additional metadata.",
-        routes=app.routes,
-        tags=[
-            {
-                "name": "rag",
-                "description": "Operations for RAG query.",
-            }
-        ],
-    )
-    # Modify openapi_schema as needed
-    app.openapi_schema = openapi_schema
-    return app.openapi_schema
-
-
-app.openapi = custom_openapi
-
-
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    try:
-        logger.info(f"Incoming request: {request.method} {request.url}")
-        response = await call_next(request)
-        logger.info(f"Response status: {response.status_code}")
-        return response
-    except Exception as e:
-        logger.exception(f"Error processing request: {e}")
-        raise e
-
-
-# Request Timing Middleware
-@app.middleware("http")
-async def add_process_time_header(request: Request, call_next):
-    start_time = time.time()
-    response = await call_next(request)
-    process_time = time.time() - start_time
-    response.headers["X-Process-Time"] = str(process_time)
-    logger.info(f"Processed in {process_time:.4f} seconds")
-    return response
-
-
-# Logging Middleware
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    logger.info(f"Incoming request: {request.method} {request.url}")
-    response = await call_next(request)
-    logger.info(f"Response status: {response.status_code}")
-    return response
-
-
-if __name__ == "__main__":
-    uvicorn.run(
-        "apis:app",
-        host="127.0.0.1",
-        port=8000,
-        reload=True,
-        log_level="info",
-        workers=1,
-    )
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde/models/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_hyde/models/payload.py
deleted file mode 100644
index ae09aba..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_hyde/models/payload.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from pydantic import BaseModel
-
-
-class Payload(BaseModel):
-    query: str
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/readme.md b/bootstraprag/templates/llamaindex/rag_with_hyde/readme.md
index f4524f6..de1d9f8 100644
--- a/bootstraprag/templates/llamaindex/rag_with_hyde/readme.md
+++ b/bootstraprag/templates/llamaindex/rag_with_hyde/readme.md
@@ -6,3 +6,20 @@
 - In the data folder place your data preferably any ".pdf"
 #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running
 - run `python main.py`
+
+### How to expose RAG as API
+- run `python api_server.py`
+- verify the swagger redoc and documentation as below
+- open browser and hit `http://localhost:8000/redoc`
+- open browser and hit `http://localhost:8000/docs`
+
+### Payload Specification
+
+- Method: POST
+- API: http://localhost:8000/api/v1/chat-completion
+- Body:
+```json
+{
+  "query": "explain mlops architecture"
+}
+```
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_hyde/requirements.txt
index f58d622..f3a6443 100644
--- a/bootstraprag/templates/llamaindex/rag_with_hyde/requirements.txt
+++ b/bootstraprag/templates/llamaindex/rag_with_hyde/requirements.txt
@@ -6,4 +6,5 @@ llama-index-llms-ollama==0.2.0
 llama-index-embeddings-openai==0.1.11
 llama-index-embeddings-ollama==0.1.2
 llama-index-vector-stores-qdrant==0.2.14
-pydantic==2.9.0
\ No newline at end of file
+pydantic==2.9.0
+litserve==0.2.2
\ No newline at end of file
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/.env b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/.env
index c637c48..d6e9266 100644
--- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/.env
+++ b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/.env
@@ -19,3 +19,6 @@ WARN = 30
 INFO = 20
 DEBUG = 10
 NOTSET = 0
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/config.py
deleted file mode 100644
index d944427..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/config.py
+++ /dev/null
@@ -1,5 +0,0 @@
-class Settings:
-    PROJECT_NAME: str = "Simple RAG as FastAPI Application"
-
-
-settings = Settings()
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/apis.py
deleted file mode 100644
index 82f1533..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/apis.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from fastapi import APIRouter, Depends
-from models.payload import Payload
-from base_rag import BaseRAG
-
-
-base_rag = BaseRAG(show_progress=True, data_path='data')
-
-router = APIRouter(prefix="/api/v1/rag", tags=["rag"])
-
-
-@router.post(path='/query')
-def fetch_response(payload: Payload):
-    response = base_rag.query(query_string=payload.query)
-    return response
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_server.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_server.py
new file mode 100644
index 0000000..d6175fe
--- /dev/null
+++ b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_server.py
@@ -0,0 +1,34 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from base_rag import BaseRAG
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class ReactRAGServingAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.base_rag = None
+
+    def setup(self, devices):
+        self.base_rag = BaseRAG(show_progress=True, data_path='data')
+
+    def decode_request(self, request, **kwargs):
+        return request["query"]
+
+    def predict(self, query: str):
+        try:
+            return self.base_rag.query(query_string=query)
+        except Exception as e:
+            return e.args[0]
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = ReactRAGServingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/apis.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/apis.py
deleted file mode 100644
index 3a885a5..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/apis.py
+++ /dev/null
@@ -1,110 +0,0 @@
-from fastapi import FastAPI, Request
-from fastapi.openapi.utils import get_openapi
-from api_routes.apis import router
-from fastapi.middleware.cors import CORSMiddleware
-import uvicorn
-import logging
-import time
-
-logging.basicConfig(level=logging.DEBUG)
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
-)
-logger = logging.getLogger(__name__)
-allowed_origins = [
-    "*"
-]
-
-app = FastAPI(
-    title="My FastAPI Application",
-    description="This is a FastAPI implementation for RAG application with Swagger UI configurations.",
-    version="1.0.0",
-    docs_url="/documentation",
-    redoc_url="/redoc",
-    openapi_url="/openapi.json",
-    contact={
-        "name": "M K Pavan Kumar",
-        "linkedin": "https://www.linkedin.com",
-    },
-    license_info={
-        "name": "MIT License",
-        "url": "https://opensource.org/licenses/MIT",
-    },
-    terms_of_service="https://www.yourwebsite.com/terms/",
-)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=allowed_origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-app.include_router(router)
-
-
-# Custom OpenAPI schema generation (optional)
-def custom_openapi():
-    if app.openapi_schema:
-        return app.openapi_schema
-    openapi_schema = get_openapi(
-        title="RAG APIs",
-        version="1.0.0",
-        description="This is a custom OpenAPI schema with additional metadata.",
-        routes=app.routes,
-        tags=[
-            {
-                "name": "rag",
-                "description": "Operations for RAG query.",
-            }
-        ],
-    )
-    # Modify openapi_schema as needed
-    app.openapi_schema = openapi_schema
-    return app.openapi_schema
-
-
-app.openapi = custom_openapi
-
-
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    try:
-        logger.info(f"Incoming request: {request.method} {request.url}")
-        response = await call_next(request)
-        logger.info(f"Response status: {response.status_code}")
-        return response
-    except Exception as e:
-        logger.exception(f"Error processing request: {e}")
-        raise e
-
-
-# Request Timing Middleware
-@app.middleware("http")
-async def add_process_time_header(request: Request, call_next):
-    start_time = time.time()
-    response = await call_next(request)
-    process_time = time.time() - start_time
-    response.headers["X-Process-Time"] = str(process_time)
-    logger.info(f"Processed in {process_time:.4f} seconds")
-    return response
-
-
-# Logging Middleware
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    logger.info(f"Incoming request: {request.method} {request.url}")
-    response = await call_next(request)
-    logger.info(f"Response status: {response.status_code}")
-    return response
-
-
-if __name__ == "__main__":
-    uvicorn.run(
-        "apis:app",
-        host="127.0.0.1",
-        port=8000,
-        reload=True,
-        log_level="info",
-        workers=1,
-    )
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/payload.py
deleted file mode 100644
index ae09aba..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/payload.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from pydantic import BaseModel
-
-
-class Payload(BaseModel):
-    query: str
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/readme.md b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/readme.md
index d42c9f7..841341b 100644
--- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/readme.md
+++ b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/readme.md
@@ -7,3 +7,20 @@
 #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running
 - run `python main.py`
 - visit http://localhost:6006/ for all the observability
+
+### How to expose RAG as API
+- run `python api_server.py`
+- verify the swagger redoc and documentation as below
+- open browser and hit `http://localhost:8000/redoc`
+- open browser and hit `http://localhost:8000/docs`
+
+### Payload Specification
+
+- Method: POST
+- API: http://localhost:8000/api/v1/chat-completion
+- Body:
+```json
+{
+  "query": "explain mlops architecture"
+}
+```
diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/requirements.txt
index 5e3353f..b5e7372 100644
--- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/requirements.txt
+++ b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/requirements.txt
@@ -8,4 +8,5 @@ llama-index-embeddings-openai==0.2.4
 llama-index-embeddings-ollama==0.3.0
 llama-index-vector-stores-qdrant==0.3.0
 llama-index-callbacks-arize-phoenix==0.2.1
-pydantic==2.9.0
\ No newline at end of file
+pydantic==2.9.0
+litserve==0.2.2
\ No newline at end of file
diff --git a/bootstraprag/templates/llamaindex/rag_with_react/.env b/bootstraprag/templates/llamaindex/rag_with_react/.env
index c637c48..d6e9266 100644
--- a/bootstraprag/templates/llamaindex/rag_with_react/.env
+++ b/bootstraprag/templates/llamaindex/rag_with_react/.env
@@ -19,3 +19,6 @@ WARN = 30
 INFO = 20
 DEBUG = 10
 NOTSET = 0
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react/api_core/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_react/api_core/config.py
deleted file mode 100644
index d944427..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_react/api_core/config.py
+++ /dev/null
@@ -1,5 +0,0 @@
-class Settings:
-    PROJECT_NAME: str = "Simple RAG as FastAPI Application"
-
-
-settings = Settings()
diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react/api_routes/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_react/api_routes/apis.py
deleted file mode 100644
index ac487dc..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_react/api_routes/apis.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from fastapi import APIRouter, Depends
-from models.payload import Payload
-from react_agent_with_query_engine import ReActWithQueryEngine
-
-
-react_with_engine = ReActWithQueryEngine(input_dir='data', show_progress=True)
-
-router = APIRouter(prefix="/api/v1/rag", tags=["rag"])
-
-
-@router.post(path='/query')
-def fetch_response(payload: Payload):
-    response = react_with_engine.query(user_query=payload.query)
-    return response
diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_server.py b/bootstraprag/templates/llamaindex/rag_with_react/api_server.py
new file mode 100644
index 0000000..fc058ef
--- /dev/null
+++ b/bootstraprag/templates/llamaindex/rag_with_react/api_server.py
@@ -0,0 +1,34 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from react_agent_with_query_engine import ReActWithQueryEngine
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class ReactRAGServingAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.react_with_engine = None
+
+    def setup(self, devices):
+        self.react_with_engine = ReActWithQueryEngine(input_dir='data', show_progress=True)
+
+    def decode_request(self, request, **kwargs):
+        return request["query"]
+
+    def predict(self, query: str):
+        try:
+            return self.self_correcting_rag.query(user_query=query)
+        except Exception as e:
+            return e.args[0]
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = ReactRAGServingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/llamaindex/rag_with_react/apis.py b/bootstraprag/templates/llamaindex/rag_with_react/apis.py
deleted file mode 100644
index 3a885a5..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_react/apis.py
+++ /dev/null
@@ -1,110 +0,0 @@
-from fastapi import FastAPI, Request
-from fastapi.openapi.utils import get_openapi
-from api_routes.apis import router
-from fastapi.middleware.cors import CORSMiddleware
-import uvicorn
-import logging
-import time
-
-logging.basicConfig(level=logging.DEBUG)
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
-)
-logger = logging.getLogger(__name__)
-allowed_origins = [
-    "*"
-]
-
-app = FastAPI(
-    title="My FastAPI Application",
-    description="This is a FastAPI implementation for RAG application with Swagger UI configurations.",
-    version="1.0.0",
-    docs_url="/documentation",
-    redoc_url="/redoc",
-    openapi_url="/openapi.json",
-    contact={
-        "name": "M K Pavan Kumar",
-        "linkedin": "https://www.linkedin.com",
-    },
-    license_info={
-        "name": "MIT License",
-        "url": "https://opensource.org/licenses/MIT",
-    },
-    terms_of_service="https://www.yourwebsite.com/terms/",
-)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=allowed_origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-app.include_router(router)
-
-
-# Custom OpenAPI schema generation (optional)
-def custom_openapi():
-    if app.openapi_schema:
-        return app.openapi_schema
-    openapi_schema = get_openapi(
-        title="RAG APIs",
-        version="1.0.0",
-        description="This is a custom OpenAPI schema with additional metadata.",
-        routes=app.routes,
-        tags=[
-            {
-                "name": "rag",
-                "description": "Operations for RAG query.",
-            }
-        ],
-    )
-    # Modify openapi_schema as needed
-    app.openapi_schema = openapi_schema
-    return app.openapi_schema
-
-
-app.openapi = custom_openapi
-
-
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    try:
-        logger.info(f"Incoming request: {request.method} {request.url}")
-        response = await call_next(request)
-        logger.info(f"Response status: {response.status_code}")
-        return response
-    except Exception as e:
-        logger.exception(f"Error processing request: {e}")
-        raise e
-
-
-# Request Timing Middleware
-@app.middleware("http")
-async def add_process_time_header(request: Request, call_next):
-    start_time = time.time()
-    response = await call_next(request)
-    process_time = time.time() - start_time
-    response.headers["X-Process-Time"] = str(process_time)
-    logger.info(f"Processed in {process_time:.4f} seconds")
-    return response
-
-
-# Logging Middleware
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    logger.info(f"Incoming request: {request.method} {request.url}")
-    response = await call_next(request)
-    logger.info(f"Response status: {response.status_code}")
-    return response
-
-
-if __name__ == "__main__":
-    uvicorn.run(
-        "apis:app",
-        host="127.0.0.1",
-        port=8000,
-        reload=True,
-        log_level="info",
-        workers=1,
-    )
diff --git a/bootstraprag/templates/llamaindex/rag_with_react/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react/models/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_react/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_react/models/payload.py
deleted file mode 100644
index ae09aba..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_react/models/payload.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from pydantic import BaseModel
-
-
-class Payload(BaseModel):
-    query: str
diff --git a/bootstraprag/templates/llamaindex/rag_with_react/readme.md b/bootstraprag/templates/llamaindex/rag_with_react/readme.md
index f4524f6..de1d9f8 100644
--- a/bootstraprag/templates/llamaindex/rag_with_react/readme.md
+++ b/bootstraprag/templates/llamaindex/rag_with_react/readme.md
@@ -6,3 +6,20 @@
 - In the data folder place your data preferably any ".pdf"
 #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running
 - run `python main.py`
+
+### How to expose RAG as API
+- run `python api_server.py`
+- verify the swagger redoc and documentation as below
+- open browser and hit `http://localhost:8000/redoc`
+- open browser and hit `http://localhost:8000/docs`
+
+### Payload Specification
+
+- Method: POST
+- API: http://localhost:8000/api/v1/chat-completion
+- Body:
+```json
+{
+  "query": "explain mlops architecture"
+}
+```
diff --git a/bootstraprag/templates/llamaindex/rag_with_react/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_react/requirements.txt
index 648d693..85fa73f 100644
--- a/bootstraprag/templates/llamaindex/rag_with_react/requirements.txt
+++ b/bootstraprag/templates/llamaindex/rag_with_react/requirements.txt
@@ -6,6 +6,5 @@ llama-index-embeddings-openai==0.2.4
 llama-index-embeddings-ollama==0.3.0
 llama-index-vector-stores-qdrant==0.3.0
 qdrant-client==1.11.1
-fastapi==0.112.1
-uvicorn==0.30.6
-pydantic==2.9.0
\ No newline at end of file
+pydantic==2.9.0
+litserve==0.2.2
\ No newline at end of file
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env
index c637c48..d6e9266 100644
--- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env
+++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env
@@ -19,3 +19,6 @@ WARN = 30
 INFO = 20
 DEBUG = 10
 NOTSET = 0
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/config.py
deleted file mode 100644
index d944427..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/config.py
+++ /dev/null
@@ -1,5 +0,0 @@
-class Settings:
-    PROJECT_NAME: str = "Simple RAG as FastAPI Application"
-
-
-settings = Settings()
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/apis.py
deleted file mode 100644
index ac487dc..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/apis.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from fastapi import APIRouter, Depends
-from models.payload import Payload
-from react_agent_with_query_engine import ReActWithQueryEngine
-
-
-react_with_engine = ReActWithQueryEngine(input_dir='data', show_progress=True)
-
-router = APIRouter(prefix="/api/v1/rag", tags=["rag"])
-
-
-@router.post(path='/query')
-def fetch_response(payload: Payload):
-    response = react_with_engine.query(user_query=payload.query)
-    return response
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py
new file mode 100644
index 0000000..fc058ef
--- /dev/null
+++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py
@@ -0,0 +1,34 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from react_agent_with_query_engine import ReActWithQueryEngine
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class ReactRAGServingAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.react_with_engine = None
+
+    def setup(self, devices):
+        self.react_with_engine = ReActWithQueryEngine(input_dir='data', show_progress=True)
+
+    def decode_request(self, request, **kwargs):
+        return request["query"]
+
+    def predict(self, query: str):
+        try:
+            return self.self_correcting_rag.query(user_query=query)
+        except Exception as e:
+            return e.args[0]
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = ReactRAGServingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/apis.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/apis.py
deleted file mode 100644
index 3a885a5..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/apis.py
+++ /dev/null
@@ -1,110 +0,0 @@
-from fastapi import FastAPI, Request
-from fastapi.openapi.utils import get_openapi
-from api_routes.apis import router
-from fastapi.middleware.cors import CORSMiddleware
-import uvicorn
-import logging
-import time
-
-logging.basicConfig(level=logging.DEBUG)
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
-)
-logger = logging.getLogger(__name__)
-allowed_origins = [
-    "*"
-]
-
-app = FastAPI(
-    title="My FastAPI Application",
-    description="This is a FastAPI implementation for RAG application with Swagger UI configurations.",
-    version="1.0.0",
-    docs_url="/documentation",
-    redoc_url="/redoc",
-    openapi_url="/openapi.json",
-    contact={
-        "name": "M K Pavan Kumar",
-        "linkedin": "https://www.linkedin.com",
-    },
-    license_info={
-        "name": "MIT License",
-        "url": "https://opensource.org/licenses/MIT",
-    },
-    terms_of_service="https://www.yourwebsite.com/terms/",
-)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=allowed_origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-app.include_router(router)
-
-
-# Custom OpenAPI schema generation (optional)
-def custom_openapi():
-    if app.openapi_schema:
-        return app.openapi_schema
-    openapi_schema = get_openapi(
-        title="RAG APIs",
-        version="1.0.0",
-        description="This is a custom OpenAPI schema with additional metadata.",
-        routes=app.routes,
-        tags=[
-            {
-                "name": "rag",
-                "description": "Operations for RAG query.",
-            }
-        ],
-    )
-    # Modify openapi_schema as needed
-    app.openapi_schema = openapi_schema
-    return app.openapi_schema
-
-
-app.openapi = custom_openapi
-
-
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    try:
-        logger.info(f"Incoming request: {request.method} {request.url}")
-        response = await call_next(request)
-        logger.info(f"Response status: {response.status_code}")
-        return response
-    except Exception as e:
-        logger.exception(f"Error processing request: {e}")
-        raise e
-
-
-# Request Timing Middleware
-@app.middleware("http")
-async def add_process_time_header(request: Request, call_next):
-    start_time = time.time()
-    response = await call_next(request)
-    process_time = time.time() - start_time
-    response.headers["X-Process-Time"] = str(process_time)
-    logger.info(f"Processed in {process_time:.4f} seconds")
-    return response
-
-
-# Logging Middleware
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    logger.info(f"Incoming request: {request.method} {request.url}")
-    response = await call_next(request)
-    logger.info(f"Response status: {response.status_code}")
-    return response
-
-
-if __name__ == "__main__":
-    uvicorn.run(
-        "apis:app",
-        host="127.0.0.1",
-        port=8000,
-        reload=True,
-        log_level="info",
-        workers=1,
-    )
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/payload.py
deleted file mode 100644
index ae09aba..0000000
--- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/payload.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from pydantic import BaseModel
-
-
-class Payload(BaseModel):
-    query: str
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/readme.md b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/readme.md
index f4524f6..47189c8 100644
--- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/readme.md
+++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/readme.md
@@ -6,3 +6,20 @@
 - In the data folder place your data preferably any ".pdf"
 #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running
 - run `python main.py`
+
+### How to expose RAG as API
+- run `python api_server.py`
+- verify the swagger redoc and documentation as below
+- open browser and hit `http://localhost:8000/redoc`
+- open browser and hit `http://localhost:8000/docs`
+
+### Payload Specification
+
+- Method: POST
+- API: http://localhost:8000/api/v1/chat-completion
+- Body:
+```json
+{
+  "query": "explain mlops architecture"
+}
+```
\ No newline at end of file
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/requirements.txt
index 9d52b18..8ed72be 100644
--- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/requirements.txt
+++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/requirements.txt
@@ -2,9 +2,8 @@ python-dotenv==1.0.1
 llama-index==0.11.7
 arize-phoenix==4.33.1
 qdrant-client==1.11.1
-fastapi==0.112.1
-uvicorn==0.30.6
 pydantic==2.9.0
+litserve==0.2.2
 llama-index-llms-openai==0.2.3
 llama-index-llms-ollama==0.3.1
 llama-index-embeddings-openai==0.2.4

From 26b1db50211b5ab322863453362eeb8de07518b9 Mon Sep 17 00:00:00 2001
From: pavanmantha <pavan.mantha@thevaslabs.io>
Date: Sat, 28 Sep 2024 19:00:57 +0530
Subject: [PATCH 2/3] -modified env to include default key

---
 bootstraprag/templates/llamaindex/rag_with_react/.env           | 2 +-
 .../templates/llamaindex/rag_with_react_with_observability/.env | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bootstraprag/templates/llamaindex/rag_with_react/.env b/bootstraprag/templates/llamaindex/rag_with_react/.env
index d6e9266..88b8230 100644
--- a/bootstraprag/templates/llamaindex/rag_with_react/.env
+++ b/bootstraprag/templates/llamaindex/rag_with_react/.env
@@ -1,5 +1,5 @@
 DB_URL='http://localhost:6333'
-DB_API_KEY=''
+DB_API_KEY='th3s3cr3tk3y'
 COLLECTION_NAME='YOUR_COLLECTION'
 
 OPENAI_API_KEY=''
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env
index d6e9266..88b8230 100644
--- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env
+++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env
@@ -1,5 +1,5 @@
 DB_URL='http://localhost:6333'
-DB_API_KEY=''
+DB_API_KEY='th3s3cr3tk3y'
 COLLECTION_NAME='YOUR_COLLECTION'
 
 OPENAI_API_KEY=''

From f9114076b11ca8f8562b506f6ac5d69b15c59f2e Mon Sep 17 00:00:00 2001
From: pavanmantha <pavan.mantha@thevaslabs.io>
Date: Sat, 28 Sep 2024 19:05:12 +0530
Subject: [PATCH 3/3] -fixed the react instance bug

---
 bootstraprag/templates/llamaindex/rag_with_react/api_server.py  | 2 +-
 .../llamaindex/rag_with_react_with_observability/api_server.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_server.py b/bootstraprag/templates/llamaindex/rag_with_react/api_server.py
index fc058ef..42d66bd 100644
--- a/bootstraprag/templates/llamaindex/rag_with_react/api_server.py
+++ b/bootstraprag/templates/llamaindex/rag_with_react/api_server.py
@@ -19,7 +19,7 @@ def decode_request(self, request, **kwargs):
 
     def predict(self, query: str):
         try:
-            return self.self_correcting_rag.query(user_query=query)
+            return self.react_with_engine.query(user_query=query)
         except Exception as e:
             return e.args[0]
 
diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py
index fc058ef..42d66bd 100644
--- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py
+++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py
@@ -19,7 +19,7 @@ def decode_request(self, request, **kwargs):
 
     def predict(self, query: str):
         try:
-            return self.self_correcting_rag.query(user_query=query)
+            return self.react_with_engine.query(user_query=query)
         except Exception as e:
             return e.args[0]