Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bootstraprag/templates/llamaindex/rag_with_flare/.env
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@ WARN = 30
INFO = 20
DEBUG = 10
NOTSET = 0

LIT_SERVER_PORT=8000
LIT_SERVER_WORKERS_PER_DEVICE=4
Empty file.

This file was deleted.

Empty file.

This file was deleted.

34 changes: 34 additions & 0 deletions bootstraprag/templates/llamaindex/rag_with_flare/api_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from abc import ABC
from dotenv import load_dotenv, find_dotenv
from base_rag import BaseRAG
import litserve as ls
import os

_ = load_dotenv(find_dotenv())


class ReactRAGServingAPI(ls.LitAPI, ABC):
def __init__(self):
self.base_rag = None

def setup(self, devices):
self.base_rag = BaseRAG(show_progress=True, data_path='data')

def decode_request(self, request, **kwargs):
return request["query"]

def predict(self, query: str):
try:
return self.base_rag.query(query_string=query)
except Exception as e:
return e.args[0]

def encode_response(self, output, **kwargs):
return {'response': output}


if __name__ == '__main__':
api = ReactRAGServingAPI()
server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
server.run(port=os.environ.get('LIT_SERVER_PORT'))
110 changes: 0 additions & 110 deletions bootstraprag/templates/llamaindex/rag_with_flare/apis.py

This file was deleted.

Empty file.

This file was deleted.

17 changes: 17 additions & 0 deletions bootstraprag/templates/llamaindex/rag_with_flare/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,20 @@
- In the data folder place your data preferably any ".pdf"
#### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running
- run `python main.py`

### How to expose RAG as API
- run `python api_server.py`
- verify the swagger redoc and documentation as below
- open browser and hit `http://localhost:8000/redoc`
- open browser and hit `http://localhost:8000/docs`

### Payload Specification

- Method: POST
- API: http://localhost:8000/api/v1/chat-completion
- Body:
```json
{
"query": "explain mlops architecture"
}
```
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ llama-index-embeddings-openai==0.1.11
llama-index-embeddings-ollama==0.1.2
llama-index-vector-stores-qdrant==0.2.14
pydantic==2.9.0
litserve==0.2.2
3 changes: 3 additions & 0 deletions bootstraprag/templates/llamaindex/rag_with_hyde/.env
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@ WARN = 30
INFO = 20
DEBUG = 10
NOTSET = 0

LIT_SERVER_PORT=8000
LIT_SERVER_WORKERS_PER_DEVICE=4
Empty file.

This file was deleted.

Empty file.
14 changes: 0 additions & 14 deletions bootstraprag/templates/llamaindex/rag_with_hyde/api_routes/apis.py

This file was deleted.

34 changes: 34 additions & 0 deletions bootstraprag/templates/llamaindex/rag_with_hyde/api_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from abc import ABC
from dotenv import load_dotenv, find_dotenv
from base_rag import BaseRAG
import litserve as ls
import os

_ = load_dotenv(find_dotenv())


class ReactRAGServingAPI(ls.LitAPI, ABC):
def __init__(self):
self.base_rag = None

def setup(self, devices):
self.base_rag = BaseRAG(show_progress=True, data_path='data')

def decode_request(self, request, **kwargs):
return request["query"]

def predict(self, query: str):
try:
return self.base_rag.query(query_string=query)
except Exception as e:
return e.args[0]

def encode_response(self, output, **kwargs):
return {'response': output}


if __name__ == '__main__':
api = ReactRAGServingAPI()
server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
server.run(port=os.environ.get('LIT_SERVER_PORT'))
110 changes: 0 additions & 110 deletions bootstraprag/templates/llamaindex/rag_with_hyde/apis.py

This file was deleted.

Empty file.

This file was deleted.

Loading