Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions bootstraprag/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,23 @@ def create(project_name, framework, template, observability):
]
elif framework == 'standalone-qdrant':
framework = 'qdrant'
template_choices = ['simple-search', 'multimodal-search', 'hybrid-search', 'hybrid-search-advanced',
'retrieval-quality', 'semantic-cache']
template_choices = [
'simple-search',
'multimodal-search',
'hybrid-search',
'hybrid-search-advanced',
'retrieval-quality',
'semantic-cache',
'semantic-routing'
]
elif framework == 'standalone-evaluations':
framework = 'evaluations'
template_choices = ['deep-evals', 'mlflow-evals', 'phoenix-evals', 'ragas-evals']
template_choices = [
'deep-evals',
'mlflow-evals',
'phoenix-evals',
'ragas-evals'
]
# Use InquirerPy to select template with arrow keys
template = inquirer.select(
message="Which template would you like to use?",
Expand Down
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
### under progress
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
llama-index-agent-introspective
llama-index-llms-openai
llama-index-llms-ollama
llama-index-embeddings-ollama
llama-index-vector-stores-qdrant
llama-index-program-openai
llama-index-readers-file
yfinance
pandas
python-dotenv
7 changes: 6 additions & 1 deletion bootstraprag/templates/qdrant/semantic_cache/.env
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@ QDRANT_URL='http://localhost:6333'
QDRANT_API_KEY='th3s3cr3tk3y'

OLLAMA_MODEL='llama3.2:latest'
OLLAMA_BASE_URL='http://localhost:11434'
OLLAMA_BASE_URL='http://localhost:11434'

model_name_or_path='all-MiniLM-L6-v2'

LIT_SERVER_PORT=8000
LIT_SERVER_WORKERS_PER_DEVICE=4
23 changes: 17 additions & 6 deletions bootstraprag/templates/qdrant/semantic_cache/api_server.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
from abc import ABC
from semantic_cache import SemanticCache, compute_response
import litserve as ls
from dotenv import load_dotenv, find_dotenv
import os


class SemanticCacheAPI(ls.LitAPI, ABC):
def __init__(self):
pass
load_dotenv(find_dotenv())
self.semantic_cache: SemanticCache = None

def setup(self, device):
pass
self.semantic_cache = SemanticCache()

def decode_request(self, request, **kwargs):
pass
return request['question']

def predict(self, x, **kwargs):
pass
def predict(self, query, **kwargs):
return self.semantic_cache.get_response(query=query, compute_response_func=compute_response)

def encode_response(self, output, **kwargs):
pass
return {"response": output}


if __name__ == '__main__':
api = SemanticCacheAPI()
server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
server.run(port=os.environ.get('LIT_SERVER_PORT'))
17 changes: 17 additions & 0 deletions bootstraprag/templates/qdrant/semantic_cache/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests

response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
print(f"Status: {response.status_code}\nResponse:\n {response.text}")
14 changes: 13 additions & 1 deletion bootstraprag/templates/qdrant/semantic_cache/readme.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
## Qdrant Semantic Cache
Semantic Cache is a superfast cache mechanism on contextual meaning very much useful for LLM giving same response with out much deviation.

### How to run
- `pip install -r requirements.txt`
- `python semantic_cache.py`
- `python semantic_cache.py`

### Expose Semantic Cache as API
- `python api_server.py`
```text
API: http://localhost:8000/api/v1/chat-completion
Method: POST
payload: {
"question": "what is the capital of India?"
}
```
10 changes: 5 additions & 5 deletions bootstraprag/templates/qdrant/semantic_cache/semantic_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(self, threshold=0.35):
# load the data from env
load_dotenv(find_dotenv())

self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
self.encoder = SentenceTransformer(model_name_or_path=os.environ.get('model_name_or_path'))
self.cache_client = QdrantClient(url=os.environ.get('QDRANT_URL'), api_key=os.environ.get('QDRANT_API_KEY'))
self.cache_collection_name = "cache"
self.threshold = threshold
Expand Down Expand Up @@ -78,7 +78,7 @@ def compute_response(query: str):
return f"Computed response for: {query} is {assistant_message}"


semantic_cache = SemanticCache(threshold=0.8)
query = "What is the capital of France?"
response = semantic_cache.get_response(query, compute_response)
print(response)
# semantic_cache = SemanticCache(threshold=0.8)
# query = "What is the capital of France?"
# response = semantic_cache.get_response(query, compute_response)
# print(response)
5 changes: 4 additions & 1 deletion bootstraprag/templates/qdrant/semantic_routing/.env
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
encoder_model='sentence-transformers/all-MiniLM-L6-v2'
qdrant_api_key='th3s3cr3tk3y'
qdrant_url='http://localhost:6333/'
qdrant_url='http://localhost:6333/'

LIT_SERVER_PORT=8000
LIT_SERVER_WORKERS_PER_DEVICE=4
50 changes: 44 additions & 6 deletions bootstraprag/templates/qdrant/semantic_routing/api_server.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,57 @@
from abc import ABC

from semantic_router import Route

from semantic_routing_core import SemanticRouter
import litserve as ls
import os


class SemanticRoutingAPI(ls.LitAPI, ABC):
def __init__(self):
pass
self.semantic_routing_core = None
# Define routes
politics = Route(
name="politics",
utterances=[
"isn't politics the best thing ever",
"why don't you tell me about your political opinions",
"don't you just love the president",
"they're going to destroy this country!",
"they will save the country!",
],
)

chitchat = Route(
name="chitchat",
utterances=[
"how's the weather today?",
"how are things going?",
"lovely weather today",
"the weather is horrendous",
"let's go to the chippy",
],
)

self.routes = [politics, chitchat]

def setup(self, device):
pass
self.semantic_routing_core = SemanticRouter()
# Set up routes
self.semantic_routing_core.setup_routes(self.routes)

def decode_request(self, request, **kwargs):
pass
return request['question']

def predict(self, x, **kwargs):
pass
def predict(self, query, **kwargs):
return self.semantic_routing_core.route_query(query=query)

def encode_response(self, output, **kwargs):
pass
return {'response': output}


if __name__ == '__main__':
api = SemanticRoutingAPI()
server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
server.run(port=os.environ.get('LIT_SERVER_PORT'))
17 changes: 17 additions & 0 deletions bootstraprag/templates/qdrant/semantic_routing/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests

response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
print(f"Status: {response.status_code}\nResponse:\n {response.text}")
12 changes: 11 additions & 1 deletion bootstraprag/templates/qdrant/semantic_routing/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,14 @@ Semantic Router is a superfast decision-making layer for your LLMs and agents. R

### How to execute code
1. `pip install -r requirements.txt`
2. `python main.py`
2. `python main.py`

### Expose Semantic Router as API
- `python api_server.py`
```text
API: http://localhost:8000/api/v1/chat-completion
Method: POST
payload: {
"question": "what is the Weather today?"
}
```
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name='bootstrap-rag',
version='0.0.13',
version='0.0.14',
long_description=long_description,
long_description_content_type="text/markdown",
packages=find_packages(),
Expand Down