pavanjava · pavanjava · Nov 13, 2024 · Nov 10, 2024 · Nov 10, 2024 · Nov 11, 2024
diff --git a/bootstraprag/cli.py b/bootstraprag/cli.py
@@ -50,11 +50,23 @@ def create(project_name, framework, template, observability):
         ]
     elif framework == 'standalone-qdrant':
         framework = 'qdrant'
-        template_choices = ['simple-search', 'multimodal-search', 'hybrid-search', 'hybrid-search-advanced',
-                            'retrieval-quality', 'semantic-cache']
+        template_choices = [
+            'simple-search',
+            'multimodal-search',
+            'hybrid-search',
+            'hybrid-search-advanced',
+            'retrieval-quality',
+            'semantic-cache',
+            'semantic-routing'
+        ]
     elif framework == 'standalone-evaluations':
         framework = 'evaluations'
-        template_choices = ['deep-evals', 'mlflow-evals', 'phoenix-evals', 'ragas-evals']
+        template_choices = [
+            'deep-evals',
+            'mlflow-evals',
+            'phoenix-evals',
+            'ragas-evals'
+        ]
     # Use InquirerPy to select template with arrow keys
     template = inquirer.select(
         message="Which template would you like to use?",

diff --git a/bootstraprag/templates/llamaindex/agents_with_introspection/__init__.py b/bootstraprag/templates/llamaindex/agents_with_introspection/__init__.py
diff --git a/bootstraprag/templates/llamaindex/agents_with_introspection/agent_core.py b/bootstraprag/templates/llamaindex/agents_with_introspection/agent_core.py
diff --git a/bootstraprag/templates/llamaindex/agents_with_introspection/api_server.py b/bootstraprag/templates/llamaindex/agents_with_introspection/api_server.py
diff --git a/bootstraprag/templates/llamaindex/agents_with_introspection/main.py b/bootstraprag/templates/llamaindex/agents_with_introspection/main.py
diff --git a/bootstraprag/templates/llamaindex/agents_with_introspection/readme.md b/bootstraprag/templates/llamaindex/agents_with_introspection/readme.md
@@ -0,0 +1 @@
+### under progress
diff --git a/bootstraprag/templates/llamaindex/agents_with_introspection/requirements.txt b/bootstraprag/templates/llamaindex/agents_with_introspection/requirements.txt
@@ -0,0 +1,10 @@
+llama-index-agent-introspective
+llama-index-llms-openai
+llama-index-llms-ollama
+llama-index-embeddings-ollama
+llama-index-vector-stores-qdrant
+llama-index-program-openai
+llama-index-readers-file
+yfinance
+pandas
+python-dotenv
diff --git a/bootstraprag/templates/qdrant/semantic_cache/.env b/bootstraprag/templates/qdrant/semantic_cache/.env
@@ -2,4 +2,9 @@ QDRANT_URL='http://localhost:6333'
 QDRANT_API_KEY='th3s3cr3tk3y'
 
 OLLAMA_MODEL='llama3.2:latest'
-OLLAMA_BASE_URL='http://localhost:11434'
+OLLAMA_BASE_URL='http://localhost:11434'
+
+model_name_or_path='all-MiniLM-L6-v2'
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
diff --git a/bootstraprag/templates/qdrant/semantic_cache/api_server.py b/bootstraprag/templates/qdrant/semantic_cache/api_server.py
@@ -1,19 +1,30 @@
 from abc import ABC
+from semantic_cache import SemanticCache, compute_response
 import litserve as ls
+from dotenv import load_dotenv, find_dotenv
+import os
 
 
 class SemanticCacheAPI(ls.LitAPI, ABC):
     def __init__(self):
-        pass
+        load_dotenv(find_dotenv())
+        self.semantic_cache: SemanticCache = None
 
     def setup(self, device):
-        pass
+        self.semantic_cache = SemanticCache()
 
     def decode_request(self, request, **kwargs):
-        pass
+        return request['question']
 
-    def predict(self, x, **kwargs):
-        pass
+    def predict(self, query, **kwargs):
+        return self.semantic_cache.get_response(query=query, compute_response_func=compute_response)
 
     def encode_response(self, output, **kwargs):
-        pass
+        return {"response": output}
+
+
+if __name__ == '__main__':
+    api = SemanticCacheAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/qdrant/semantic_cache/client.py b/bootstraprag/templates/qdrant/semantic_cache/client.py
@@ -0,0 +1,17 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import requests
+
+response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
+print(f"Status: {response.status_code}\nResponse:\n {response.text}")
diff --git a/bootstraprag/templates/qdrant/semantic_cache/readme.md b/bootstraprag/templates/qdrant/semantic_cache/readme.md
@@ -1,4 +1,16 @@
 ## Qdrant Semantic Cache
+Semantic Cache is a superfast cache mechanism on contextual meaning very much useful for LLM giving same response with out much deviation.
 
+### How to run
 - `pip install -r requirements.txt`
-- `python semantic_cache.py`
+- `python semantic_cache.py`
+
+### Expose Semantic Cache as API
+- `python api_server.py`
+```text
+API: http://localhost:8000/api/v1/chat-completion
+Method: POST
+payload: {
+  "question": "what is the capital of India?"
+}
+```
diff --git a/bootstraprag/templates/qdrant/semantic_cache/semantic_cache.py b/bootstraprag/templates/qdrant/semantic_cache/semantic_cache.py
@@ -13,7 +13,7 @@ def __init__(self, threshold=0.35):
         # load the data from env
         load_dotenv(find_dotenv())
 
-        self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
+        self.encoder = SentenceTransformer(model_name_or_path=os.environ.get('model_name_or_path'))
         self.cache_client = QdrantClient(url=os.environ.get('QDRANT_URL'), api_key=os.environ.get('QDRANT_API_KEY'))
         self.cache_collection_name = "cache"
         self.threshold = threshold
@@ -78,7 +78,7 @@ def compute_response(query: str):
     return f"Computed response for: {query} is {assistant_message}"
 
 
-semantic_cache = SemanticCache(threshold=0.8)
-query = "What is the capital of France?"
-response = semantic_cache.get_response(query, compute_response)
-print(response)
+# semantic_cache = SemanticCache(threshold=0.8)
+# query = "What is the capital of France?"
+# response = semantic_cache.get_response(query, compute_response)
+# print(response)
diff --git a/bootstraprag/templates/qdrant/semantic_routing/.env b/bootstraprag/templates/qdrant/semantic_routing/.env
@@ -1,3 +1,6 @@
 encoder_model='sentence-transformers/all-MiniLM-L6-v2'
 qdrant_api_key='th3s3cr3tk3y'
-qdrant_url='http://localhost:6333/'
+qdrant_url='http://localhost:6333/'
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
diff --git a/bootstraprag/templates/qdrant/semantic_routing/api_server.py b/bootstraprag/templates/qdrant/semantic_routing/api_server.py
@@ -1,19 +1,57 @@
 from abc import ABC
+
+from semantic_router import Route
+
+from semantic_routing_core import SemanticRouter
 import litserve as ls
+import os
 
 
 class SemanticRoutingAPI(ls.LitAPI, ABC):
     def __init__(self):
-        pass
+        self.semantic_routing_core = None
+        # Define routes
+        politics = Route(
+            name="politics",
+            utterances=[
+                "isn't politics the best thing ever",
+                "why don't you tell me about your political opinions",
+                "don't you just love the president",
+                "they're going to destroy this country!",
+                "they will save the country!",
+            ],
+        )
+
+        chitchat = Route(
+            name="chitchat",
+            utterances=[
+                "how's the weather today?",
+                "how are things going?",
+                "lovely weather today",
+                "the weather is horrendous",
+                "let's go to the chippy",
+            ],
+        )
+
+        self.routes = [politics, chitchat]
 
     def setup(self, device):
-        pass
+        self.semantic_routing_core = SemanticRouter()
+        # Set up routes
+        self.semantic_routing_core.setup_routes(self.routes)
 
     def decode_request(self, request, **kwargs):
-        pass
+        return request['question']
 
-    def predict(self, x, **kwargs):
-        pass
+    def predict(self, query, **kwargs):
+        return self.semantic_routing_core.route_query(query=query)
 
     def encode_response(self, output, **kwargs):
-        pass
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = SemanticRoutingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/qdrant/semantic_routing/client.py b/bootstraprag/templates/qdrant/semantic_routing/client.py
@@ -0,0 +1,17 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import requests
+
+response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
+print(f"Status: {response.status_code}\nResponse:\n {response.text}")
diff --git a/bootstraprag/templates/qdrant/semantic_routing/readme.md b/bootstraprag/templates/qdrant/semantic_routing/readme.md
@@ -3,4 +3,14 @@ Semantic Router is a superfast decision-making layer for your LLMs and agents. R
 
 ### How to execute code
 1. `pip install -r requirements.txt`
-2. `python main.py`
+2. `python main.py`
+
+### Expose Semantic Router as API
+- `python api_server.py`
+```text
+API: http://localhost:8000/api/v1/chat-completion
+Method: POST
+payload: {
+  "question": "what is the Weather today?"
+}
+```
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name='bootstrap-rag',
-    version='0.0.13',
+    version='0.0.14',
     long_description=long_description,
     long_description_content_type="text/markdown",
     packages=find_packages(),