From 545b30126a579bc5f89c0692593ee9cb08a2efde Mon Sep 17 00:00:00 2001 From: pavanmantha Date: Sat, 28 Sep 2024 18:46:39 +0530 Subject: [PATCH 1/3] -modified fastapi to litserve --- .../templates/llamaindex/rag_with_flare/.env | 3 + .../rag_with_flare/api_core/__init__.py | 0 .../rag_with_flare/api_core/config.py | 5 - .../rag_with_flare/api_routes/__init__.py | 0 .../rag_with_flare/api_routes/apis.py | 14 --- .../llamaindex/rag_with_flare/api_server.py | 34 ++++++ .../llamaindex/rag_with_flare/apis.py | 110 ------------------ .../rag_with_flare/models/__init__.py | 0 .../rag_with_flare/models/payload.py | 5 - .../llamaindex/rag_with_flare/readme.md | 17 +++ .../rag_with_flare/requirements.txt | 1 + .../templates/llamaindex/rag_with_hyde/.env | 3 + .../rag_with_hyde/api_core/__init__.py | 0 .../rag_with_hyde/api_core/config.py | 5 - .../rag_with_hyde/api_routes/__init__.py | 0 .../rag_with_hyde/api_routes/apis.py | 14 --- .../llamaindex/rag_with_hyde/api_server.py | 34 ++++++ .../llamaindex/rag_with_hyde/apis.py | 110 ------------------ .../rag_with_hyde/models/__init__.py | 0 .../rag_with_hyde/models/payload.py | 5 - .../llamaindex/rag_with_hyde/readme.md | 17 +++ .../llamaindex/rag_with_hyde/requirements.txt | 3 +- .../rag_with_hyde_with_observability/.env | 3 + .../api_core/__init__.py | 0 .../api_core/config.py | 5 - .../api_routes/__init__.py | 0 .../api_routes/apis.py | 14 --- .../api_server.py | 34 ++++++ .../rag_with_hyde_with_observability/apis.py | 110 ------------------ .../models/__init__.py | 0 .../models/payload.py | 5 - .../readme.md | 17 +++ .../requirements.txt | 3 +- .../templates/llamaindex/rag_with_react/.env | 3 + .../rag_with_react/api_core/__init__.py | 0 .../rag_with_react/api_core/config.py | 5 - .../rag_with_react/api_routes/__init__.py | 0 .../rag_with_react/api_routes/apis.py | 14 --- .../llamaindex/rag_with_react/api_server.py | 34 ++++++ .../llamaindex/rag_with_react/apis.py | 110 ------------------ .../rag_with_react/models/__init__.py | 0 .../rag_with_react/models/payload.py | 5 - .../llamaindex/rag_with_react/readme.md | 17 +++ .../rag_with_react/requirements.txt | 5 +- .../rag_with_react_with_observability/.env | 3 + .../api_core/__init__.py | 0 .../api_core/config.py | 5 - .../api_routes/__init__.py | 0 .../api_routes/apis.py | 14 --- .../api_server.py | 34 ++++++ .../rag_with_react_with_observability/apis.py | 110 ------------------ .../models/__init__.py | 0 .../models/payload.py | 5 - .../readme.md | 17 +++ .../requirements.txt | 3 +- 55 files changed, 278 insertions(+), 677 deletions(-) delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/api_core/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/api_routes/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py create mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/api_server.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/apis.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/models/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/api_core/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/api_core/config.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/apis.py create mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/api_server.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/apis.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/models/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde/models/payload.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/config.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/apis.py create mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_server.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/apis.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/payload.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/api_core/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/api_core/config.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/api_routes/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/api_routes/apis.py create mode 100644 bootstraprag/templates/llamaindex/rag_with_react/api_server.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/apis.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/models/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react/models/payload.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/config.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/apis.py create mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/apis.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/__init__.py delete mode 100644 bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/payload.py diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/.env b/bootstraprag/templates/llamaindex/rag_with_flare/.env index 0184b1e..88b8230 100644 --- a/bootstraprag/templates/llamaindex/rag_with_flare/.env +++ b/bootstraprag/templates/llamaindex/rag_with_flare/.env @@ -19,3 +19,6 @@ WARN = 30 INFO = 20 DEBUG = 10 NOTSET = 0 + +LIT_SERVER_PORT=8000 +LIT_SERVER_WORKERS_PER_DEVICE=4 diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_core/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py deleted file mode 100644 index d944427..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_flare/api_core/config.py +++ /dev/null @@ -1,5 +0,0 @@ -class Settings: - PROJECT_NAME: str = "Simple RAG as FastAPI Application" - - -settings = Settings() diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py deleted file mode 100644 index 82f1533..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_flare/api_routes/apis.py +++ /dev/null @@ -1,14 +0,0 @@ -from fastapi import APIRouter, Depends -from models.payload import Payload -from base_rag import BaseRAG - - -base_rag = BaseRAG(show_progress=True, data_path='data') - -router = APIRouter(prefix="/api/v1/rag", tags=["rag"]) - - -@router.post(path='/query') -def fetch_response(payload: Payload): - response = base_rag.query(query_string=payload.query) - return response diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/api_server.py b/bootstraprag/templates/llamaindex/rag_with_flare/api_server.py new file mode 100644 index 0000000..d6175fe --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_flare/api_server.py @@ -0,0 +1,34 @@ +from abc import ABC +from dotenv import load_dotenv, find_dotenv +from base_rag import BaseRAG +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class ReactRAGServingAPI(ls.LitAPI, ABC): + def __init__(self): + self.base_rag = None + + def setup(self, devices): + self.base_rag = BaseRAG(show_progress=True, data_path='data') + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str): + try: + return self.base_rag.query(query_string=query) + except Exception as e: + return e.args[0] + + def encode_response(self, output, **kwargs): + return {'response': output} + + +if __name__ == '__main__': + api = ReactRAGServingAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/apis.py b/bootstraprag/templates/llamaindex/rag_with_flare/apis.py deleted file mode 100644 index 3a885a5..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_flare/apis.py +++ /dev/null @@ -1,110 +0,0 @@ -from fastapi import FastAPI, Request -from fastapi.openapi.utils import get_openapi -from api_routes.apis import router -from fastapi.middleware.cors import CORSMiddleware -import uvicorn -import logging -import time - -logging.basicConfig(level=logging.DEBUG) -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", -) -logger = logging.getLogger(__name__) -allowed_origins = [ - "*" -] - -app = FastAPI( - title="My FastAPI Application", - description="This is a FastAPI implementation for RAG application with Swagger UI configurations.", - version="1.0.0", - docs_url="/documentation", - redoc_url="/redoc", - openapi_url="/openapi.json", - contact={ - "name": "M K Pavan Kumar", - "linkedin": "https://www.linkedin.com", - }, - license_info={ - "name": "MIT License", - "url": "https://opensource.org/licenses/MIT", - }, - terms_of_service="https://www.yourwebsite.com/terms/", -) -app.add_middleware( - CORSMiddleware, - allow_origins=allowed_origins, - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) -app.include_router(router) - - -# Custom OpenAPI schema generation (optional) -def custom_openapi(): - if app.openapi_schema: - return app.openapi_schema - openapi_schema = get_openapi( - title="RAG APIs", - version="1.0.0", - description="This is a custom OpenAPI schema with additional metadata.", - routes=app.routes, - tags=[ - { - "name": "rag", - "description": "Operations for RAG query.", - } - ], - ) - # Modify openapi_schema as needed - app.openapi_schema = openapi_schema - return app.openapi_schema - - -app.openapi = custom_openapi - - -@app.middleware("http") -async def log_requests(request: Request, call_next): - try: - logger.info(f"Incoming request: {request.method} {request.url}") - response = await call_next(request) - logger.info(f"Response status: {response.status_code}") - return response - except Exception as e: - logger.exception(f"Error processing request: {e}") - raise e - - -# Request Timing Middleware -@app.middleware("http") -async def add_process_time_header(request: Request, call_next): - start_time = time.time() - response = await call_next(request) - process_time = time.time() - start_time - response.headers["X-Process-Time"] = str(process_time) - logger.info(f"Processed in {process_time:.4f} seconds") - return response - - -# Logging Middleware -@app.middleware("http") -async def log_requests(request: Request, call_next): - logger.info(f"Incoming request: {request.method} {request.url}") - response = await call_next(request) - logger.info(f"Response status: {response.status_code}") - return response - - -if __name__ == "__main__": - uvicorn.run( - "apis:app", - host="127.0.0.1", - port=8000, - reload=True, - log_level="info", - workers=1, - ) diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_flare/models/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py deleted file mode 100644 index ae09aba..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_flare/models/payload.py +++ /dev/null @@ -1,5 +0,0 @@ -from pydantic import BaseModel - - -class Payload(BaseModel): - query: str diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/readme.md b/bootstraprag/templates/llamaindex/rag_with_flare/readme.md index f4524f6..de1d9f8 100644 --- a/bootstraprag/templates/llamaindex/rag_with_flare/readme.md +++ b/bootstraprag/templates/llamaindex/rag_with_flare/readme.md @@ -6,3 +6,20 @@ - In the data folder place your data preferably any ".pdf" #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running - run `python main.py` + +### How to expose RAG as API +- run `python api_server.py` +- verify the swagger redoc and documentation as below +- open browser and hit `http://localhost:8000/redoc` +- open browser and hit `http://localhost:8000/docs` + +### Payload Specification + +- Method: POST +- API: http://localhost:8000/api/v1/chat-completion +- Body: +```json +{ + "query": "explain mlops architecture" +} +``` diff --git a/bootstraprag/templates/llamaindex/rag_with_flare/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_flare/requirements.txt index 8d37fc9..a48fe28 100644 --- a/bootstraprag/templates/llamaindex/rag_with_flare/requirements.txt +++ b/bootstraprag/templates/llamaindex/rag_with_flare/requirements.txt @@ -7,3 +7,4 @@ llama-index-embeddings-openai==0.1.11 llama-index-embeddings-ollama==0.1.2 llama-index-vector-stores-qdrant==0.2.14 pydantic==2.9.0 +litserve==0.2.2 diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/.env b/bootstraprag/templates/llamaindex/rag_with_hyde/.env index 0184b1e..88b8230 100644 --- a/bootstraprag/templates/llamaindex/rag_with_hyde/.env +++ b/bootstraprag/templates/llamaindex/rag_with_hyde/.env @@ -19,3 +19,6 @@ WARN = 30 INFO = 20 DEBUG = 10 NOTSET = 0 + +LIT_SERVER_PORT=8000 +LIT_SERVER_WORKERS_PER_DEVICE=4 diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/config.py deleted file mode 100644 index d944427..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_hyde/api_core/config.py +++ /dev/null @@ -1,5 +0,0 @@ -class Settings: - PROJECT_NAME: str = "Simple RAG as FastAPI Application" - - -settings = Settings() diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/apis.py deleted file mode 100644 index 82f1533..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/apis.py +++ /dev/null @@ -1,14 +0,0 @@ -from fastapi import APIRouter, Depends -from models.payload import Payload -from base_rag import BaseRAG - - -base_rag = BaseRAG(show_progress=True, data_path='data') - -router = APIRouter(prefix="/api/v1/rag", tags=["rag"]) - - -@router.post(path='/query') -def fetch_response(payload: Payload): - response = base_rag.query(query_string=payload.query) - return response diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/api_server.py b/bootstraprag/templates/llamaindex/rag_with_hyde/api_server.py new file mode 100644 index 0000000..d6175fe --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_hyde/api_server.py @@ -0,0 +1,34 @@ +from abc import ABC +from dotenv import load_dotenv, find_dotenv +from base_rag import BaseRAG +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class ReactRAGServingAPI(ls.LitAPI, ABC): + def __init__(self): + self.base_rag = None + + def setup(self, devices): + self.base_rag = BaseRAG(show_progress=True, data_path='data') + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str): + try: + return self.base_rag.query(query_string=query) + except Exception as e: + return e.args[0] + + def encode_response(self, output, **kwargs): + return {'response': output} + + +if __name__ == '__main__': + api = ReactRAGServingAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/apis.py b/bootstraprag/templates/llamaindex/rag_with_hyde/apis.py deleted file mode 100644 index 3a885a5..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_hyde/apis.py +++ /dev/null @@ -1,110 +0,0 @@ -from fastapi import FastAPI, Request -from fastapi.openapi.utils import get_openapi -from api_routes.apis import router -from fastapi.middleware.cors import CORSMiddleware -import uvicorn -import logging -import time - -logging.basicConfig(level=logging.DEBUG) -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", -) -logger = logging.getLogger(__name__) -allowed_origins = [ - "*" -] - -app = FastAPI( - title="My FastAPI Application", - description="This is a FastAPI implementation for RAG application with Swagger UI configurations.", - version="1.0.0", - docs_url="/documentation", - redoc_url="/redoc", - openapi_url="/openapi.json", - contact={ - "name": "M K Pavan Kumar", - "linkedin": "https://www.linkedin.com", - }, - license_info={ - "name": "MIT License", - "url": "https://opensource.org/licenses/MIT", - }, - terms_of_service="https://www.yourwebsite.com/terms/", -) -app.add_middleware( - CORSMiddleware, - allow_origins=allowed_origins, - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) -app.include_router(router) - - -# Custom OpenAPI schema generation (optional) -def custom_openapi(): - if app.openapi_schema: - return app.openapi_schema - openapi_schema = get_openapi( - title="RAG APIs", - version="1.0.0", - description="This is a custom OpenAPI schema with additional metadata.", - routes=app.routes, - tags=[ - { - "name": "rag", - "description": "Operations for RAG query.", - } - ], - ) - # Modify openapi_schema as needed - app.openapi_schema = openapi_schema - return app.openapi_schema - - -app.openapi = custom_openapi - - -@app.middleware("http") -async def log_requests(request: Request, call_next): - try: - logger.info(f"Incoming request: {request.method} {request.url}") - response = await call_next(request) - logger.info(f"Response status: {response.status_code}") - return response - except Exception as e: - logger.exception(f"Error processing request: {e}") - raise e - - -# Request Timing Middleware -@app.middleware("http") -async def add_process_time_header(request: Request, call_next): - start_time = time.time() - response = await call_next(request) - process_time = time.time() - start_time - response.headers["X-Process-Time"] = str(process_time) - logger.info(f"Processed in {process_time:.4f} seconds") - return response - - -# Logging Middleware -@app.middleware("http") -async def log_requests(request: Request, call_next): - logger.info(f"Incoming request: {request.method} {request.url}") - response = await call_next(request) - logger.info(f"Response status: {response.status_code}") - return response - - -if __name__ == "__main__": - uvicorn.run( - "apis:app", - host="127.0.0.1", - port=8000, - reload=True, - log_level="info", - workers=1, - ) diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde/models/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_hyde/models/payload.py deleted file mode 100644 index ae09aba..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_hyde/models/payload.py +++ /dev/null @@ -1,5 +0,0 @@ -from pydantic import BaseModel - - -class Payload(BaseModel): - query: str diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/readme.md b/bootstraprag/templates/llamaindex/rag_with_hyde/readme.md index f4524f6..de1d9f8 100644 --- a/bootstraprag/templates/llamaindex/rag_with_hyde/readme.md +++ b/bootstraprag/templates/llamaindex/rag_with_hyde/readme.md @@ -6,3 +6,20 @@ - In the data folder place your data preferably any ".pdf" #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running - run `python main.py` + +### How to expose RAG as API +- run `python api_server.py` +- verify the swagger redoc and documentation as below +- open browser and hit `http://localhost:8000/redoc` +- open browser and hit `http://localhost:8000/docs` + +### Payload Specification + +- Method: POST +- API: http://localhost:8000/api/v1/chat-completion +- Body: +```json +{ + "query": "explain mlops architecture" +} +``` diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_hyde/requirements.txt index f58d622..f3a6443 100644 --- a/bootstraprag/templates/llamaindex/rag_with_hyde/requirements.txt +++ b/bootstraprag/templates/llamaindex/rag_with_hyde/requirements.txt @@ -6,4 +6,5 @@ llama-index-llms-ollama==0.2.0 llama-index-embeddings-openai==0.1.11 llama-index-embeddings-ollama==0.1.2 llama-index-vector-stores-qdrant==0.2.14 -pydantic==2.9.0 \ No newline at end of file +pydantic==2.9.0 +litserve==0.2.2 \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/.env b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/.env index c637c48..d6e9266 100644 --- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/.env +++ b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/.env @@ -19,3 +19,6 @@ WARN = 30 INFO = 20 DEBUG = 10 NOTSET = 0 + +LIT_SERVER_PORT=8000 +LIT_SERVER_WORKERS_PER_DEVICE=4 diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/config.py deleted file mode 100644 index d944427..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_core/config.py +++ /dev/null @@ -1,5 +0,0 @@ -class Settings: - PROJECT_NAME: str = "Simple RAG as FastAPI Application" - - -settings = Settings() diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/apis.py deleted file mode 100644 index 82f1533..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_routes/apis.py +++ /dev/null @@ -1,14 +0,0 @@ -from fastapi import APIRouter, Depends -from models.payload import Payload -from base_rag import BaseRAG - - -base_rag = BaseRAG(show_progress=True, data_path='data') - -router = APIRouter(prefix="/api/v1/rag", tags=["rag"]) - - -@router.post(path='/query') -def fetch_response(payload: Payload): - response = base_rag.query(query_string=payload.query) - return response diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_server.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_server.py new file mode 100644 index 0000000..d6175fe --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/api_server.py @@ -0,0 +1,34 @@ +from abc import ABC +from dotenv import load_dotenv, find_dotenv +from base_rag import BaseRAG +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class ReactRAGServingAPI(ls.LitAPI, ABC): + def __init__(self): + self.base_rag = None + + def setup(self, devices): + self.base_rag = BaseRAG(show_progress=True, data_path='data') + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str): + try: + return self.base_rag.query(query_string=query) + except Exception as e: + return e.args[0] + + def encode_response(self, output, **kwargs): + return {'response': output} + + +if __name__ == '__main__': + api = ReactRAGServingAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/apis.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/apis.py deleted file mode 100644 index 3a885a5..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/apis.py +++ /dev/null @@ -1,110 +0,0 @@ -from fastapi import FastAPI, Request -from fastapi.openapi.utils import get_openapi -from api_routes.apis import router -from fastapi.middleware.cors import CORSMiddleware -import uvicorn -import logging -import time - -logging.basicConfig(level=logging.DEBUG) -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", -) -logger = logging.getLogger(__name__) -allowed_origins = [ - "*" -] - -app = FastAPI( - title="My FastAPI Application", - description="This is a FastAPI implementation for RAG application with Swagger UI configurations.", - version="1.0.0", - docs_url="/documentation", - redoc_url="/redoc", - openapi_url="/openapi.json", - contact={ - "name": "M K Pavan Kumar", - "linkedin": "https://www.linkedin.com", - }, - license_info={ - "name": "MIT License", - "url": "https://opensource.org/licenses/MIT", - }, - terms_of_service="https://www.yourwebsite.com/terms/", -) -app.add_middleware( - CORSMiddleware, - allow_origins=allowed_origins, - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) -app.include_router(router) - - -# Custom OpenAPI schema generation (optional) -def custom_openapi(): - if app.openapi_schema: - return app.openapi_schema - openapi_schema = get_openapi( - title="RAG APIs", - version="1.0.0", - description="This is a custom OpenAPI schema with additional metadata.", - routes=app.routes, - tags=[ - { - "name": "rag", - "description": "Operations for RAG query.", - } - ], - ) - # Modify openapi_schema as needed - app.openapi_schema = openapi_schema - return app.openapi_schema - - -app.openapi = custom_openapi - - -@app.middleware("http") -async def log_requests(request: Request, call_next): - try: - logger.info(f"Incoming request: {request.method} {request.url}") - response = await call_next(request) - logger.info(f"Response status: {response.status_code}") - return response - except Exception as e: - logger.exception(f"Error processing request: {e}") - raise e - - -# Request Timing Middleware -@app.middleware("http") -async def add_process_time_header(request: Request, call_next): - start_time = time.time() - response = await call_next(request) - process_time = time.time() - start_time - response.headers["X-Process-Time"] = str(process_time) - logger.info(f"Processed in {process_time:.4f} seconds") - return response - - -# Logging Middleware -@app.middleware("http") -async def log_requests(request: Request, call_next): - logger.info(f"Incoming request: {request.method} {request.url}") - response = await call_next(request) - logger.info(f"Response status: {response.status_code}") - return response - - -if __name__ == "__main__": - uvicorn.run( - "apis:app", - host="127.0.0.1", - port=8000, - reload=True, - log_level="info", - workers=1, - ) diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/payload.py deleted file mode 100644 index ae09aba..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/models/payload.py +++ /dev/null @@ -1,5 +0,0 @@ -from pydantic import BaseModel - - -class Payload(BaseModel): - query: str diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/readme.md b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/readme.md index d42c9f7..841341b 100644 --- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/readme.md +++ b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/readme.md @@ -7,3 +7,20 @@ #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running - run `python main.py` - visit http://localhost:6006/ for all the observability + +### How to expose RAG as API +- run `python api_server.py` +- verify the swagger redoc and documentation as below +- open browser and hit `http://localhost:8000/redoc` +- open browser and hit `http://localhost:8000/docs` + +### Payload Specification + +- Method: POST +- API: http://localhost:8000/api/v1/chat-completion +- Body: +```json +{ + "query": "explain mlops architecture" +} +``` diff --git a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/requirements.txt index 5e3353f..b5e7372 100644 --- a/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/requirements.txt +++ b/bootstraprag/templates/llamaindex/rag_with_hyde_with_observability/requirements.txt @@ -8,4 +8,5 @@ llama-index-embeddings-openai==0.2.4 llama-index-embeddings-ollama==0.3.0 llama-index-vector-stores-qdrant==0.3.0 llama-index-callbacks-arize-phoenix==0.2.1 -pydantic==2.9.0 \ No newline at end of file +pydantic==2.9.0 +litserve==0.2.2 \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_react/.env b/bootstraprag/templates/llamaindex/rag_with_react/.env index c637c48..d6e9266 100644 --- a/bootstraprag/templates/llamaindex/rag_with_react/.env +++ b/bootstraprag/templates/llamaindex/rag_with_react/.env @@ -19,3 +19,6 @@ WARN = 30 INFO = 20 DEBUG = 10 NOTSET = 0 + +LIT_SERVER_PORT=8000 +LIT_SERVER_WORKERS_PER_DEVICE=4 diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react/api_core/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_react/api_core/config.py deleted file mode 100644 index d944427..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_react/api_core/config.py +++ /dev/null @@ -1,5 +0,0 @@ -class Settings: - PROJECT_NAME: str = "Simple RAG as FastAPI Application" - - -settings = Settings() diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react/api_routes/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_react/api_routes/apis.py deleted file mode 100644 index ac487dc..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_react/api_routes/apis.py +++ /dev/null @@ -1,14 +0,0 @@ -from fastapi import APIRouter, Depends -from models.payload import Payload -from react_agent_with_query_engine import ReActWithQueryEngine - - -react_with_engine = ReActWithQueryEngine(input_dir='data', show_progress=True) - -router = APIRouter(prefix="/api/v1/rag", tags=["rag"]) - - -@router.post(path='/query') -def fetch_response(payload: Payload): - response = react_with_engine.query(user_query=payload.query) - return response diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_server.py b/bootstraprag/templates/llamaindex/rag_with_react/api_server.py new file mode 100644 index 0000000..fc058ef --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_react/api_server.py @@ -0,0 +1,34 @@ +from abc import ABC +from dotenv import load_dotenv, find_dotenv +from react_agent_with_query_engine import ReActWithQueryEngine +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class ReactRAGServingAPI(ls.LitAPI, ABC): + def __init__(self): + self.react_with_engine = None + + def setup(self, devices): + self.react_with_engine = ReActWithQueryEngine(input_dir='data', show_progress=True) + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str): + try: + return self.self_correcting_rag.query(user_query=query) + except Exception as e: + return e.args[0] + + def encode_response(self, output, **kwargs): + return {'response': output} + + +if __name__ == '__main__': + api = ReactRAGServingAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/llamaindex/rag_with_react/apis.py b/bootstraprag/templates/llamaindex/rag_with_react/apis.py deleted file mode 100644 index 3a885a5..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_react/apis.py +++ /dev/null @@ -1,110 +0,0 @@ -from fastapi import FastAPI, Request -from fastapi.openapi.utils import get_openapi -from api_routes.apis import router -from fastapi.middleware.cors import CORSMiddleware -import uvicorn -import logging -import time - -logging.basicConfig(level=logging.DEBUG) -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", -) -logger = logging.getLogger(__name__) -allowed_origins = [ - "*" -] - -app = FastAPI( - title="My FastAPI Application", - description="This is a FastAPI implementation for RAG application with Swagger UI configurations.", - version="1.0.0", - docs_url="/documentation", - redoc_url="/redoc", - openapi_url="/openapi.json", - contact={ - "name": "M K Pavan Kumar", - "linkedin": "https://www.linkedin.com", - }, - license_info={ - "name": "MIT License", - "url": "https://opensource.org/licenses/MIT", - }, - terms_of_service="https://www.yourwebsite.com/terms/", -) -app.add_middleware( - CORSMiddleware, - allow_origins=allowed_origins, - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) -app.include_router(router) - - -# Custom OpenAPI schema generation (optional) -def custom_openapi(): - if app.openapi_schema: - return app.openapi_schema - openapi_schema = get_openapi( - title="RAG APIs", - version="1.0.0", - description="This is a custom OpenAPI schema with additional metadata.", - routes=app.routes, - tags=[ - { - "name": "rag", - "description": "Operations for RAG query.", - } - ], - ) - # Modify openapi_schema as needed - app.openapi_schema = openapi_schema - return app.openapi_schema - - -app.openapi = custom_openapi - - -@app.middleware("http") -async def log_requests(request: Request, call_next): - try: - logger.info(f"Incoming request: {request.method} {request.url}") - response = await call_next(request) - logger.info(f"Response status: {response.status_code}") - return response - except Exception as e: - logger.exception(f"Error processing request: {e}") - raise e - - -# Request Timing Middleware -@app.middleware("http") -async def add_process_time_header(request: Request, call_next): - start_time = time.time() - response = await call_next(request) - process_time = time.time() - start_time - response.headers["X-Process-Time"] = str(process_time) - logger.info(f"Processed in {process_time:.4f} seconds") - return response - - -# Logging Middleware -@app.middleware("http") -async def log_requests(request: Request, call_next): - logger.info(f"Incoming request: {request.method} {request.url}") - response = await call_next(request) - logger.info(f"Response status: {response.status_code}") - return response - - -if __name__ == "__main__": - uvicorn.run( - "apis:app", - host="127.0.0.1", - port=8000, - reload=True, - log_level="info", - workers=1, - ) diff --git a/bootstraprag/templates/llamaindex/rag_with_react/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react/models/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_react/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_react/models/payload.py deleted file mode 100644 index ae09aba..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_react/models/payload.py +++ /dev/null @@ -1,5 +0,0 @@ -from pydantic import BaseModel - - -class Payload(BaseModel): - query: str diff --git a/bootstraprag/templates/llamaindex/rag_with_react/readme.md b/bootstraprag/templates/llamaindex/rag_with_react/readme.md index f4524f6..de1d9f8 100644 --- a/bootstraprag/templates/llamaindex/rag_with_react/readme.md +++ b/bootstraprag/templates/llamaindex/rag_with_react/readme.md @@ -6,3 +6,20 @@ - In the data folder place your data preferably any ".pdf" #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running - run `python main.py` + +### How to expose RAG as API +- run `python api_server.py` +- verify the swagger redoc and documentation as below +- open browser and hit `http://localhost:8000/redoc` +- open browser and hit `http://localhost:8000/docs` + +### Payload Specification + +- Method: POST +- API: http://localhost:8000/api/v1/chat-completion +- Body: +```json +{ + "query": "explain mlops architecture" +} +``` diff --git a/bootstraprag/templates/llamaindex/rag_with_react/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_react/requirements.txt index 648d693..85fa73f 100644 --- a/bootstraprag/templates/llamaindex/rag_with_react/requirements.txt +++ b/bootstraprag/templates/llamaindex/rag_with_react/requirements.txt @@ -6,6 +6,5 @@ llama-index-embeddings-openai==0.2.4 llama-index-embeddings-ollama==0.3.0 llama-index-vector-stores-qdrant==0.3.0 qdrant-client==1.11.1 -fastapi==0.112.1 -uvicorn==0.30.6 -pydantic==2.9.0 \ No newline at end of file +pydantic==2.9.0 +litserve==0.2.2 \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env index c637c48..d6e9266 100644 --- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env +++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env @@ -19,3 +19,6 @@ WARN = 30 INFO = 20 DEBUG = 10 NOTSET = 0 + +LIT_SERVER_PORT=8000 +LIT_SERVER_WORKERS_PER_DEVICE=4 diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/config.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/config.py deleted file mode 100644 index d944427..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_core/config.py +++ /dev/null @@ -1,5 +0,0 @@ -class Settings: - PROJECT_NAME: str = "Simple RAG as FastAPI Application" - - -settings = Settings() diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/apis.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/apis.py deleted file mode 100644 index ac487dc..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_routes/apis.py +++ /dev/null @@ -1,14 +0,0 @@ -from fastapi import APIRouter, Depends -from models.payload import Payload -from react_agent_with_query_engine import ReActWithQueryEngine - - -react_with_engine = ReActWithQueryEngine(input_dir='data', show_progress=True) - -router = APIRouter(prefix="/api/v1/rag", tags=["rag"]) - - -@router.post(path='/query') -def fetch_response(payload: Payload): - response = react_with_engine.query(user_query=payload.query) - return response diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py new file mode 100644 index 0000000..fc058ef --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py @@ -0,0 +1,34 @@ +from abc import ABC +from dotenv import load_dotenv, find_dotenv +from react_agent_with_query_engine import ReActWithQueryEngine +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class ReactRAGServingAPI(ls.LitAPI, ABC): + def __init__(self): + self.react_with_engine = None + + def setup(self, devices): + self.react_with_engine = ReActWithQueryEngine(input_dir='data', show_progress=True) + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str): + try: + return self.self_correcting_rag.query(user_query=query) + except Exception as e: + return e.args[0] + + def encode_response(self, output, **kwargs): + return {'response': output} + + +if __name__ == '__main__': + api = ReactRAGServingAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/apis.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/apis.py deleted file mode 100644 index 3a885a5..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/apis.py +++ /dev/null @@ -1,110 +0,0 @@ -from fastapi import FastAPI, Request -from fastapi.openapi.utils import get_openapi -from api_routes.apis import router -from fastapi.middleware.cors import CORSMiddleware -import uvicorn -import logging -import time - -logging.basicConfig(level=logging.DEBUG) -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", -) -logger = logging.getLogger(__name__) -allowed_origins = [ - "*" -] - -app = FastAPI( - title="My FastAPI Application", - description="This is a FastAPI implementation for RAG application with Swagger UI configurations.", - version="1.0.0", - docs_url="/documentation", - redoc_url="/redoc", - openapi_url="/openapi.json", - contact={ - "name": "M K Pavan Kumar", - "linkedin": "https://www.linkedin.com", - }, - license_info={ - "name": "MIT License", - "url": "https://opensource.org/licenses/MIT", - }, - terms_of_service="https://www.yourwebsite.com/terms/", -) -app.add_middleware( - CORSMiddleware, - allow_origins=allowed_origins, - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) -app.include_router(router) - - -# Custom OpenAPI schema generation (optional) -def custom_openapi(): - if app.openapi_schema: - return app.openapi_schema - openapi_schema = get_openapi( - title="RAG APIs", - version="1.0.0", - description="This is a custom OpenAPI schema with additional metadata.", - routes=app.routes, - tags=[ - { - "name": "rag", - "description": "Operations for RAG query.", - } - ], - ) - # Modify openapi_schema as needed - app.openapi_schema = openapi_schema - return app.openapi_schema - - -app.openapi = custom_openapi - - -@app.middleware("http") -async def log_requests(request: Request, call_next): - try: - logger.info(f"Incoming request: {request.method} {request.url}") - response = await call_next(request) - logger.info(f"Response status: {response.status_code}") - return response - except Exception as e: - logger.exception(f"Error processing request: {e}") - raise e - - -# Request Timing Middleware -@app.middleware("http") -async def add_process_time_header(request: Request, call_next): - start_time = time.time() - response = await call_next(request) - process_time = time.time() - start_time - response.headers["X-Process-Time"] = str(process_time) - logger.info(f"Processed in {process_time:.4f} seconds") - return response - - -# Logging Middleware -@app.middleware("http") -async def log_requests(request: Request, call_next): - logger.info(f"Incoming request: {request.method} {request.url}") - response = await call_next(request) - logger.info(f"Response status: {response.status_code}") - return response - - -if __name__ == "__main__": - uvicorn.run( - "apis:app", - host="127.0.0.1", - port=8000, - reload=True, - log_level="info", - workers=1, - ) diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/__init__.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/payload.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/payload.py deleted file mode 100644 index ae09aba..0000000 --- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/models/payload.py +++ /dev/null @@ -1,5 +0,0 @@ -from pydantic import BaseModel - - -class Payload(BaseModel): - query: str diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/readme.md b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/readme.md index f4524f6..47189c8 100644 --- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/readme.md +++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/readme.md @@ -6,3 +6,20 @@ - In the data folder place your data preferably any ".pdf" #### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running - run `python main.py` + +### How to expose RAG as API +- run `python api_server.py` +- verify the swagger redoc and documentation as below +- open browser and hit `http://localhost:8000/redoc` +- open browser and hit `http://localhost:8000/docs` + +### Payload Specification + +- Method: POST +- API: http://localhost:8000/api/v1/chat-completion +- Body: +```json +{ + "query": "explain mlops architecture" +} +``` \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/requirements.txt index 9d52b18..8ed72be 100644 --- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/requirements.txt +++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/requirements.txt @@ -2,9 +2,8 @@ python-dotenv==1.0.1 llama-index==0.11.7 arize-phoenix==4.33.1 qdrant-client==1.11.1 -fastapi==0.112.1 -uvicorn==0.30.6 pydantic==2.9.0 +litserve==0.2.2 llama-index-llms-openai==0.2.3 llama-index-llms-ollama==0.3.1 llama-index-embeddings-openai==0.2.4 From 26b1db50211b5ab322863453362eeb8de07518b9 Mon Sep 17 00:00:00 2001 From: pavanmantha Date: Sat, 28 Sep 2024 19:00:57 +0530 Subject: [PATCH 2/3] -modified env to include default key --- bootstraprag/templates/llamaindex/rag_with_react/.env | 2 +- .../templates/llamaindex/rag_with_react_with_observability/.env | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bootstraprag/templates/llamaindex/rag_with_react/.env b/bootstraprag/templates/llamaindex/rag_with_react/.env index d6e9266..88b8230 100644 --- a/bootstraprag/templates/llamaindex/rag_with_react/.env +++ b/bootstraprag/templates/llamaindex/rag_with_react/.env @@ -1,5 +1,5 @@ DB_URL='http://localhost:6333' -DB_API_KEY='' +DB_API_KEY='th3s3cr3tk3y' COLLECTION_NAME='YOUR_COLLECTION' OPENAI_API_KEY='' diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env index d6e9266..88b8230 100644 --- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env +++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/.env @@ -1,5 +1,5 @@ DB_URL='http://localhost:6333' -DB_API_KEY='' +DB_API_KEY='th3s3cr3tk3y' COLLECTION_NAME='YOUR_COLLECTION' OPENAI_API_KEY='' From f9114076b11ca8f8562b506f6ac5d69b15c59f2e Mon Sep 17 00:00:00 2001 From: pavanmantha Date: Sat, 28 Sep 2024 19:05:12 +0530 Subject: [PATCH 3/3] -fixed the react instance bug --- bootstraprag/templates/llamaindex/rag_with_react/api_server.py | 2 +- .../llamaindex/rag_with_react_with_observability/api_server.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bootstraprag/templates/llamaindex/rag_with_react/api_server.py b/bootstraprag/templates/llamaindex/rag_with_react/api_server.py index fc058ef..42d66bd 100644 --- a/bootstraprag/templates/llamaindex/rag_with_react/api_server.py +++ b/bootstraprag/templates/llamaindex/rag_with_react/api_server.py @@ -19,7 +19,7 @@ def decode_request(self, request, **kwargs): def predict(self, query: str): try: - return self.self_correcting_rag.query(user_query=query) + return self.react_with_engine.query(user_query=query) except Exception as e: return e.args[0] diff --git a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py index fc058ef..42d66bd 100644 --- a/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py +++ b/bootstraprag/templates/llamaindex/rag_with_react_with_observability/api_server.py @@ -19,7 +19,7 @@ def decode_request(self, request, **kwargs): def predict(self, query: str): try: - return self.self_correcting_rag.query(user_query=query) + return self.react_with_engine.query(user_query=query) except Exception as e: return e.args[0]