From a79fcc0e97037850eb144888739b020ba8a36a58 Mon Sep 17 00:00:00 2001
From: pavanmantha <pavan.mantha@thevaslabs.io>
Date: Sun, 10 Nov 2024 21:01:29 +0530
Subject: [PATCH 1/2] -added api and docker capability of semantic cache and
 semantic router

---
 .../templates/qdrant/semantic_cache/.env      |  7 ++-
 .../qdrant/semantic_cache/api_server.py       | 23 ++++++---
 .../templates/qdrant/semantic_cache/client.py | 17 +++++++
 .../templates/qdrant/semantic_cache/readme.md | 14 +++++-
 .../qdrant/semantic_cache/semantic_cache.py   | 10 ++--
 .../templates/qdrant/semantic_routing/.env    |  5 +-
 .../qdrant/semantic_routing/api_server.py     | 50 ++++++++++++++++---
 .../qdrant/semantic_routing/client.py         | 17 +++++++
 .../qdrant/semantic_routing/readme.md         | 12 ++++-
 9 files changed, 134 insertions(+), 21 deletions(-)
 create mode 100644 bootstraprag/templates/qdrant/semantic_cache/client.py
 create mode 100644 bootstraprag/templates/qdrant/semantic_routing/client.py

diff --git a/bootstraprag/templates/qdrant/semantic_cache/.env b/bootstraprag/templates/qdrant/semantic_cache/.env
index 8aafba2..13c8a64 100644
--- a/bootstraprag/templates/qdrant/semantic_cache/.env
+++ b/bootstraprag/templates/qdrant/semantic_cache/.env
@@ -2,4 +2,9 @@ QDRANT_URL='http://localhost:6333'
 QDRANT_API_KEY='th3s3cr3tk3y'
 
 OLLAMA_MODEL='llama3.2:latest'
-OLLAMA_BASE_URL='http://localhost:11434'
\ No newline at end of file
+OLLAMA_BASE_URL='http://localhost:11434'
+
+model_name_or_path='all-MiniLM-L6-v2'
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
\ No newline at end of file
diff --git a/bootstraprag/templates/qdrant/semantic_cache/api_server.py b/bootstraprag/templates/qdrant/semantic_cache/api_server.py
index e69bad6..f8001d4 100644
--- a/bootstraprag/templates/qdrant/semantic_cache/api_server.py
+++ b/bootstraprag/templates/qdrant/semantic_cache/api_server.py
@@ -1,19 +1,30 @@
 from abc import ABC
+from semantic_cache import SemanticCache, compute_response
 import litserve as ls
+from dotenv import load_dotenv, find_dotenv
+import os
 
 
 class SemanticCacheAPI(ls.LitAPI, ABC):
     def __init__(self):
-        pass
+        load_dotenv(find_dotenv())
+        self.semantic_cache: SemanticCache = None
 
     def setup(self, device):
-        pass
+        self.semantic_cache = SemanticCache()
 
     def decode_request(self, request, **kwargs):
-        pass
+        return request['question']
 
-    def predict(self, x, **kwargs):
-        pass
+    def predict(self, query, **kwargs):
+        return self.semantic_cache.get_response(query=query, compute_response_func=compute_response)
 
     def encode_response(self, output, **kwargs):
-        pass
+        return {"response": output}
+
+
+if __name__ == '__main__':
+    api = SemanticCacheAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
\ No newline at end of file
diff --git a/bootstraprag/templates/qdrant/semantic_cache/client.py b/bootstraprag/templates/qdrant/semantic_cache/client.py
new file mode 100644
index 0000000..e396db2
--- /dev/null
+++ b/bootstraprag/templates/qdrant/semantic_cache/client.py
@@ -0,0 +1,17 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import requests
+
+response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
+print(f"Status: {response.status_code}\nResponse:\n {response.text}")
diff --git a/bootstraprag/templates/qdrant/semantic_cache/readme.md b/bootstraprag/templates/qdrant/semantic_cache/readme.md
index 3e7afa1..194cef3 100644
--- a/bootstraprag/templates/qdrant/semantic_cache/readme.md
+++ b/bootstraprag/templates/qdrant/semantic_cache/readme.md
@@ -1,4 +1,16 @@
 ## Qdrant Semantic Cache
+Semantic Cache is a superfast cache mechanism on contextual meaning very much useful for LLM giving same response with out much deviation.
 
+### How to run
 - `pip install -r requirements.txt`
-- `python semantic_cache.py`
\ No newline at end of file
+- `python semantic_cache.py`
+
+### Expose Semantic Cache as API
+- `python api_server.py`
+```text
+API: http://localhost:8000/api/v1/chat-completion
+Method: POST
+payload: {
+  "question": "what is the capital of India?"
+}
+```
diff --git a/bootstraprag/templates/qdrant/semantic_cache/semantic_cache.py b/bootstraprag/templates/qdrant/semantic_cache/semantic_cache.py
index 8da1a85..07f083d 100644
--- a/bootstraprag/templates/qdrant/semantic_cache/semantic_cache.py
+++ b/bootstraprag/templates/qdrant/semantic_cache/semantic_cache.py
@@ -13,7 +13,7 @@ def __init__(self, threshold=0.35):
         # load the data from env
         load_dotenv(find_dotenv())
 
-        self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
+        self.encoder = SentenceTransformer(model_name_or_path=os.environ.get('model_name_or_path'))
         self.cache_client = QdrantClient(url=os.environ.get('QDRANT_URL'), api_key=os.environ.get('QDRANT_API_KEY'))
         self.cache_collection_name = "cache"
         self.threshold = threshold
@@ -78,7 +78,7 @@ def compute_response(query: str):
     return f"Computed response for: {query} is {assistant_message}"
 
 
-semantic_cache = SemanticCache(threshold=0.8)
-query = "What is the capital of France?"
-response = semantic_cache.get_response(query, compute_response)
-print(response)
+# semantic_cache = SemanticCache(threshold=0.8)
+# query = "What is the capital of France?"
+# response = semantic_cache.get_response(query, compute_response)
+# print(response)
diff --git a/bootstraprag/templates/qdrant/semantic_routing/.env b/bootstraprag/templates/qdrant/semantic_routing/.env
index 4dcbfc2..7c876d0 100644
--- a/bootstraprag/templates/qdrant/semantic_routing/.env
+++ b/bootstraprag/templates/qdrant/semantic_routing/.env
@@ -1,3 +1,6 @@
 encoder_model='sentence-transformers/all-MiniLM-L6-v2'
 qdrant_api_key='th3s3cr3tk3y'
-qdrant_url='http://localhost:6333/'
\ No newline at end of file
+qdrant_url='http://localhost:6333/'
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=4
\ No newline at end of file
diff --git a/bootstraprag/templates/qdrant/semantic_routing/api_server.py b/bootstraprag/templates/qdrant/semantic_routing/api_server.py
index a3529f3..74a1776 100644
--- a/bootstraprag/templates/qdrant/semantic_routing/api_server.py
+++ b/bootstraprag/templates/qdrant/semantic_routing/api_server.py
@@ -1,19 +1,57 @@
 from abc import ABC
+
+from semantic_router import Route
+
+from semantic_routing_core import SemanticRouter
 import litserve as ls
+import os
 
 
 class SemanticRoutingAPI(ls.LitAPI, ABC):
     def __init__(self):
-        pass
+        self.semantic_routing_core = None
+        # Define routes
+        politics = Route(
+            name="politics",
+            utterances=[
+                "isn't politics the best thing ever",
+                "why don't you tell me about your political opinions",
+                "don't you just love the president",
+                "they're going to destroy this country!",
+                "they will save the country!",
+            ],
+        )
+
+        chitchat = Route(
+            name="chitchat",
+            utterances=[
+                "how's the weather today?",
+                "how are things going?",
+                "lovely weather today",
+                "the weather is horrendous",
+                "let's go to the chippy",
+            ],
+        )
+
+        self.routes = [politics, chitchat]
 
     def setup(self, device):
-        pass
+        self.semantic_routing_core = SemanticRouter()
+        # Set up routes
+        self.semantic_routing_core.setup_routes(self.routes)
 
     def decode_request(self, request, **kwargs):
-        pass
+        return request['question']
 
-    def predict(self, x, **kwargs):
-        pass
+    def predict(self, query, **kwargs):
+        return self.semantic_routing_core.route_query(query=query)
 
     def encode_response(self, output, **kwargs):
-        pass
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = SemanticRoutingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/qdrant/semantic_routing/client.py b/bootstraprag/templates/qdrant/semantic_routing/client.py
new file mode 100644
index 0000000..e396db2
--- /dev/null
+++ b/bootstraprag/templates/qdrant/semantic_routing/client.py
@@ -0,0 +1,17 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import requests
+
+response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
+print(f"Status: {response.status_code}\nResponse:\n {response.text}")
diff --git a/bootstraprag/templates/qdrant/semantic_routing/readme.md b/bootstraprag/templates/qdrant/semantic_routing/readme.md
index b19049a..dcadcda 100644
--- a/bootstraprag/templates/qdrant/semantic_routing/readme.md
+++ b/bootstraprag/templates/qdrant/semantic_routing/readme.md
@@ -3,4 +3,14 @@ Semantic Router is a superfast decision-making layer for your LLMs and agents. R
 
 ### How to execute code
 1. `pip install -r requirements.txt`
-2. `python main.py`
\ No newline at end of file
+2. `python main.py`
+
+### Expose Semantic Router as API
+- `python api_server.py`
+```text
+API: http://localhost:8000/api/v1/chat-completion
+Method: POST
+payload: {
+  "question": "what is the Weather today?"
+}
+```
\ No newline at end of file

From 7b2033a4851c533fd84716b117c9e22beeb9ff05 Mon Sep 17 00:00:00 2001
From: pavanmantha <pavan.mantha@thevaslabs.io>
Date: Sun, 10 Nov 2024 21:06:48 +0530
Subject: [PATCH 2/2] -enhanced cli options and -incremented setup

---
 bootstraprag/cli.py | 18 +++++++++++++++---
 setup.py            |  2 +-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/bootstraprag/cli.py b/bootstraprag/cli.py
index 57f9893..ba5afe6 100644
--- a/bootstraprag/cli.py
+++ b/bootstraprag/cli.py
@@ -50,11 +50,23 @@ def create(project_name, framework, template, observability):
         ]
     elif framework == 'standalone-qdrant':
         framework = 'qdrant'
-        template_choices = ['simple-search', 'multimodal-search', 'hybrid-search', 'hybrid-search-advanced',
-                            'retrieval-quality', 'semantic-cache']
+        template_choices = [
+            'simple-search',
+            'multimodal-search',
+            'hybrid-search',
+            'hybrid-search-advanced',
+            'retrieval-quality',
+            'semantic-cache',
+            'semantic-routing'
+        ]
     elif framework == 'standalone-evaluations':
         framework = 'evaluations'
-        template_choices = ['deep-evals', 'mlflow-evals', 'phoenix-evals', 'ragas-evals']
+        template_choices = [
+            'deep-evals',
+            'mlflow-evals',
+            'phoenix-evals',
+            'ragas-evals'
+        ]
     # Use InquirerPy to select template with arrow keys
     template = inquirer.select(
         message="Which template would you like to use?",
diff --git a/setup.py b/setup.py
index dc1e2ff..ee4c299 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name='bootstrap-rag',
-    version='0.0.13',
+    version='0.0.14',
     long_description=long_description,
     long_description_content_type="text/markdown",
     packages=find_packages(),