pavanjava · pavanjava · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024
diff --git a/bootstraprag/cli.py b/bootstraprag/cli.py
@@ -45,6 +45,7 @@ def create(project_name, framework, template, observability):
             'simple-rag'
         ]
     elif framework == 'standalone-qdrant':
+        framework = 'qdrant'
         template_choices = ['simple-search', 'multimodal-search', 'hybrid-search', 'hybrid-search-advanced',
                             'retrieval-quality']
     # Use InquirerPy to select template with arrow keys

diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/.env b/bootstraprag/templates/langchain/simple_rag_with_observability/.env
@@ -0,0 +1,10 @@
+OLLAMA_BASE_URL="http://localhost:11434"
+OLLAMA_LLM_MODEL="llama3.1"
+EMBEDDING_MODEL="snowflake/snowflake-arctic-embed-s"
+
+QDRANT_DB_URL="http://localhost:6333/"
+QDRANT_DB_KEY="th3s3cr3tk3y"
+COLLECTION_NAME="test_langchain_collection"
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=2
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/Dockerfile b/bootstraprag/templates/langchain/simple_rag_with_observability/Dockerfile
@@ -0,0 +1,24 @@
+# Use the official Python image from the Docker Hub
+FROM python:3.9-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the requirements file to the container
+COPY requirements.txt .
+
+# Install the required dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the current directory contents into the container at /app
+COPY . .
+
+# Set environment variables (you can replace these with values from your .env file or other configs)
+ENV QDRANT_DB_URL='http://host.docker.internal:6333' \
+    OLLAMA_BASE_URL='http://host.docker.internal:11434'
+
+# Expose port 8000 for external access
+EXPOSE 8000
+
+# Command to run your application
+CMD ["python", "api_server.py"]
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/__init__.py b/bootstraprag/templates/langchain/simple_rag_with_observability/__init__.py
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/api_server.py b/bootstraprag/templates/langchain/simple_rag_with_observability/api_server.py
@@ -0,0 +1,36 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from simple_rag import SimpleRAG
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class SimpleRAGServingAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.simpleRAG: SimpleRAG = None
+        self.file_path: str = "data/mlops.pdf"
+        self.collection_name: str = os.environ.get("COLLECTION_NAME", 'test_collection')
+        self.qdrant_url: str = os.environ.get("QDRANT_DB_URL", 'http://localhost:6333')
+        self.qdrant_api_key: str = os.environ.get("QDRANT_DB_KEY", 'your_api_key_here')
+
+    def setup(self, devices):
+        self.simpleRAG = SimpleRAG(file_path=self.file_path, collection_name=self.collection_name,
+                                   qdrant_url=self.qdrant_url, qdrant_api_key=self.qdrant_api_key)
+
+    def decode_request(self, request, **kwargs):
+        return request["query"]
+
+    def predict(self, query: str):
+        return self.simpleRAG.query(user_query=query)
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = SimpleRAGServingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/client.py b/bootstraprag/templates/langchain/simple_rag_with_observability/client.py
@@ -0,0 +1,17 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import requests
+
+response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
+print(f"Status: {response.status_code}\nResponse:\n {response.text}")
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/custom_templates.py b/bootstraprag/templates/langchain/simple_rag_with_observability/custom_templates.py
@@ -0,0 +1,10 @@
+chat_prompt_template = """
+        You are an assistant for question-answering tasks. 
+        Use the following pieces of retrieved context to answer the question. 
+        If you don't know the answer, just say that you don't know. 
+
+        Question: {input}
+        Context: {context}
+
+        Answer:
+        """
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/data/mlops.pdf b/bootstraprag/templates/langchain/simple_rag_with_observability/data/mlops.pdf
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/main.py b/bootstraprag/templates/langchain/simple_rag_with_observability/main.py
@@ -0,0 +1,29 @@
+import os
+
+from simple_rag import SimpleRAG
+from dotenv import load_dotenv, find_dotenv
+
+load_dotenv(find_dotenv())
+
+simpleRag = SimpleRAG(
+    file_path='data/mlops.pdf',
+    collection_name=os.environ.get("COLLECTION_NAME"),
+    qdrant_url=os.environ.get("QDRANT_DB_URL"),
+    qdrant_api_key=os.environ.get("QDRANT_DB_KEY")
+)
+
+'''Uncomment the following line to insert data (only needed once) explicitly,
+else the data is inserted on the initialization'''
+# simpleRag.insert_data_with_metadata()
+
+# Start a loop to continually get input from the user
+while True:
+    # Get a query from the user
+    user_query = input("Enter your query [type 'bye' to 'exit']: ")
+
+    # Check if the user wants to terminate the loop
+    if user_query.lower() == "bye" or user_query.lower() == "exit":
+        break
+
+    response = simpleRag.query(user_query=user_query)
+    print(f"Answer: {response}")
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/readme.md b/bootstraprag/templates/langchain/simple_rag_with_observability/readme.md
@@ -0,0 +1,75 @@
+# BasicRAG Project
+
+This project implements a basic RAG based Question-Answering system using LangChain, Ollama, and Qdrant.
+
+## Prerequisites
+
+- Python 3.8 or higher
+- Ollama running locally (for LLM)
+- Qdrant running locally (for vector storage)
+
+## project structure
+```tree
+.
+├── Dockerfile
+├── __init__.py
+├── api_server.py
+├── client.py
+├── custom_templates.py
+├── data
+│   └── mlops.pdf
+├── main.py
+├── readme.md
+├── requirements.txt
+└── simple_rag.py
+```
+
+## Installation
+
+1. `pip install bootstrap-rag`
+
+### Setting up Ollama and Qdrant
+Method 1:
+1. navigate to root_folder/setups
+2. run the docker-compose-dev.yml
+3. run the pull_model as per the underlying OS
+
+Method 2:
+1. Install and run Ollama:
+   - Follow the instructions at [Ollama's official website](https://ollama.ai/) to install Ollama.
+   - Make sure Ollama is running and accessible at `http://localhost:11434`.
+
+2. Install and run Qdrant:
+   - Follow the instructions at [Qdrant's official website](https://qdrant.tech/documentation/quick-start/) to install Qdrant.
+   - Make sure Qdrant is running and accessible at `http://localhost:6333`.
+
+## How to Run
+1. Create a virtual environment (optional but recommended):
+   ```
+   python -m venv venv
+   source venv/bin/activate  # On Windows, use `venv\Scripts\activate`
+   ```
+2. run `bootstraprag create <your_poc_project_name>`
+
+3. Install the required dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+
+### Usage
+
+1. Prepare your MLOps PDF document and place it in the `data` directory.
+
+2. Update the `.env` file with your specific configuration:
+    - Update the `file_path` to point to your PDF document.
+    - Update the `collection_name` if you want to use a different name for your Qdrant collection.
+    - Update the `qdrant_url` and `qdrant_api_key` if your Qdrant setup is different.
+
+3. Run the script:
+   ```
+   python main.py
+   ```
+   or
+   ```
+   python api_server.py
+   ```
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/requirements.txt b/bootstraprag/templates/langchain/simple_rag_with_observability/requirements.txt
@@ -0,0 +1,14 @@
+langchain==0.3.3
+langchain-core==0.3.10
+langchain-qdrant==0.1.4
+langchain-ollama==0.2.0
+langchain-community==0.3.2
+qdrant-client==1.12.0
+fastembed==0.3.6
+PyMuPDF==1.24.11
+python-dotenv==1.0.1
+litserve==0.2.2
+pydantic==2.9.0
+arize-phoenix==5.2.2
+arize-phoenix-otel==0.5.1
+openinference-instrumentation-langchain==0.1.28
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/simple_rag.py b/bootstraprag/templates/langchain/simple_rag_with_observability/simple_rag.py
@@ -0,0 +1,110 @@
+import os
+
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_ollama import OllamaLLM
+from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain.chains.retrieval import create_retrieval_chain
+from langchain_qdrant import QdrantVectorStore
+from langchain.prompts import ChatPromptTemplate
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from qdrant_client import QdrantClient
+from qdrant_client.http.models import Distance, VectorParams, PointStruct
+from typing import Any, List
+from uuid import uuid4
+from dotenv import load_dotenv, find_dotenv
+from custom_templates import chat_prompt_template
+from phoenix.otel import register
+from openinference.instrumentation.langchain import LangChainInstrumentor
+import phoenix as px
+
+px.launch_app().view()
+tracer_provider = register(
+    project_name="simple-rag",
+    endpoint="http://127.0.0.1:4317",  # change this to remote if needed
+    set_global_tracer_provider=True
+
+)
+LangChainInstrumentor().instrument(tracer_provider=tracer_provider, skip_dep_check=True)
+
+
+class SimpleRAG:
+    def __init__(self, file_path: str, collection_name: str, qdrant_url: str, qdrant_api_key: str):
+        load_dotenv(find_dotenv())
+        self.file_path = file_path
+        self.collection_name = collection_name
+        self.qdrant_url = qdrant_url
+        self.qdrant_api_key = qdrant_api_key
+
+        self.model = OllamaLLM(model=os.environ.get("OLLAMA_LLM_MODEL"), base_url=os.environ.get("OLLAMA_BASE_URL"))
+        self.embedding = FastEmbedEmbeddings(model=os.environ.get("EMBEDDING_MODEL"))
+        self.client = QdrantClient(url=self.qdrant_url, api_key=self.qdrant_api_key)
+
+        # self.documents = self.load_and_split_documents()
+        self.setup_qdrant()
+        self.vector_store = self.setup_vector_store()
+        self.retrieval_chain = self.setup_retrieval_chain()
+
+    def setup_qdrant(self):
+        if not self.client.collection_exists(collection_name=self.collection_name):
+            try:
+                self.client.create_collection(
+                    collection_name=self.collection_name,
+                    vectors_config={
+                        "content": VectorParams(size=384, distance=Distance.COSINE)
+                    }
+                )
+
+                self.insert_data_with_metadata()
+            except Exception as e:
+                print(f"Exception: {str(e)}")
+
+    def load_and_split_documents(self) -> List[Any]:
+        loader = PyMuPDFLoader(file_path=self.file_path)
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
+        return loader.load_and_split(text_splitter=text_splitter)
+
+    def insert_data_with_metadata(self):
+        documents = self.load_and_split_documents()
+        chunked_data = []
+
+        for doc in documents:
+            id = str(uuid4())
+            content = doc.page_content
+            source = doc.metadata['source']
+            page = doc.metadata['page']
+
+            content_vector = self.embedding.embed_documents([content])[0]
+            vector_dict = {"content": content_vector}
+
+            payload = {
+                "page_content": content,
+                "metadata": {
+                    "id": id,
+                    "page_content": content,
+                    "source": source,
+                    "page": page,
+                }
+            }
+
+            metadata = PointStruct(id=id, vector=vector_dict, payload=payload)
+            chunked_data.append(metadata)
+
+        self.client.upsert(
+            collection_name=self.collection_name,
+            wait=True,
+            points=chunked_data)
+
+    def setup_vector_store(self) -> QdrantVectorStore:
+        return QdrantVectorStore(client=self.client, collection_name=self.collection_name, embedding=self.embedding,
+                                 vector_name="content")
+
+    def setup_retrieval_chain(self):
+        prompt = ChatPromptTemplate.from_template(template=chat_prompt_template)
+        retriever = self.vector_store.as_retriever()
+        combine_docs_chain = create_stuff_documents_chain(self.model, prompt)
+        return create_retrieval_chain(retriever, combine_docs_chain)
+
+    def query(self, user_query: str) -> str:
+        result = self.retrieval_chain.invoke({"input": user_query})
+        return result["answer"]