pavanjava · pavanjava · Oct 12, 2024 · Oct 12, 2024
diff --git a/bootstraprag/templates/langchain/rag_with_hyde/.env b/bootstraprag/templates/langchain/rag_with_hyde/.env
@@ -0,0 +1,10 @@
+COLLECTION_NAME="summaries"
+VECTOR_NAME="content"
+LLM_MODEL="llama3.1:latest"
+OLLAMA_BASE_URL="http://localhost:11434"
+EMBEDDING_MODEL="snowflake-arctic-embed:33m"
+QDRANT_URL="http://localhost:6333"
+QDRANT_API_KEY="th3s3cr3tk3y"
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=2
diff --git a/bootstraprag/templates/langchain/rag_with_hyde/Dockerfile b/bootstraprag/templates/langchain/rag_with_hyde/Dockerfile
@@ -0,0 +1,24 @@
+# Use the official Python image from the Docker Hub
+FROM python:3.9-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the requirements file to the container
+COPY requirements.txt .
+
+# Install the required dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the current directory contents into the container at /app
+COPY . .
+
+# Set environment variables (you can replace these with values from your .env file or other configs)
+ENV QDRANT_URL='http://host.docker.internal:6333' \
+    OLLAMA_BASE_URL='http://host.docker.internal:11434'
+
+# Expose port 8000 for external access
+EXPOSE 8000
+
+# Command to run your application
+CMD ["python", "api_server.py"]
diff --git a/bootstraprag/templates/langchain/rag_with_hyde/__init__.py b/bootstraprag/templates/langchain/rag_with_hyde/__init__.py
diff --git a/bootstraprag/templates/langchain/rag_with_hyde/api_server.py b/bootstraprag/templates/langchain/rag_with_hyde/api_server.py
@@ -0,0 +1,64 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from openai import base_url
+
+from core_advanced_rag import RetrievalAugmentationGenerationUsingHyDE
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class SimpleRAGServingAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.advanced_rag: RetrievalAugmentationGenerationUsingHyDE = None
+        self.FILE_PATH = 'data/mlops.pdf'
+        self.COLLECTION_NAME = os.environ.get('COLLECTION_NAME')
+        self.VECTOR_NAME = os.environ.get('VECTOR_NAME')
+        self.PROMPT_TEMPLATE = """Answer the question based only on the following context:
+        {context}
+
+        Question: {question}
+        """
+        self.LLM_MODEL = os.environ.get('LLM_MODEL')
+        self.EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL')
+        self.QDRANT_URL = os.environ.get('QDRANT_URL')
+        self.QDRANT_API_KEY = os.environ.get('QDRANT_API_KEY')
+        self.OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL')
+
+    def setup(self, devices):
+        self.advanced_rag = RetrievalAugmentationGenerationUsingHyDE(
+            file_path=self.FILE_PATH,
+            collection_name=self.COLLECTION_NAME,
+            vector_name=self.VECTOR_NAME,
+            prompt_template=self.PROMPT_TEMPLATE,
+            llm_model=self.LLM_MODEL,
+            embedding_model=self.EMBEDDING_MODEL,
+            qdrant_url=self.QDRANT_URL,
+            qdrant_api_key=self.QDRANT_API_KEY,
+            base_url=self.OLLAMA_BASE_URL
+        )
+        # Load Documents
+        documents = self.advanced_rag.load_documents()
+
+        # Get Embeddings
+        embeddings = self.advanced_rag.get_embeddings()
+
+        # Setup Qdrant and Add Documents
+        self.advanced_rag.setup_qdrant_collection(embeddings=embeddings, documents=documents)
+
+    def decode_request(self, request, **kwargs):
+        return request["query"]
+
+    def predict(self, query: str):
+        return self.advanced_rag.execute_pipeline(user_query=query)
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = SimpleRAGServingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/langchain/rag_with_hyde/client.py b/bootstraprag/templates/langchain/rag_with_hyde/client.py
@@ -0,0 +1,17 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import requests
+
+response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
+print(f"Status: {response.status_code}\nResponse:\n {response.text}")
diff --git a/bootstraprag/templates/langchain/rag_with_hyde/core_advanced_rag.py b/bootstraprag/templates/langchain/rag_with_hyde/core_advanced_rag.py
@@ -0,0 +1,82 @@
+from langchain_core.output_parsers import StrOutputParser
+from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
+from langchain.prompts import PromptTemplate
+from langchain_qdrant import QdrantVectorStore
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_ollama import OllamaEmbeddings, ChatOllama
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from qdrant_client.http.models import Distance, VectorParams
+import qdrant_client
+
+
+class RetrievalAugmentationGenerationUsingHyDE:
+    def __init__(self, file_path, collection_name, vector_name, prompt_template, llm_model, embedding_model, qdrant_url,
+                 qdrant_api_key, base_url):
+        self.file_path = file_path
+        self.collection_name = collection_name
+        self.vector_name = vector_name
+        self.prompt_template = prompt_template
+        self.qdrant_url = qdrant_url
+        self.qdrant_api_key = qdrant_api_key
+        self.llm = ChatOllama(model=llm_model, temperature=0.2, base_url=base_url)
+        self.base_embeddings = OllamaEmbeddings(model=embedding_model)
+        self.client = qdrant_client.QdrantClient(url=qdrant_url, api_key=qdrant_api_key)
+        self.vector_store = None
+
+    def load_documents(self):
+        loader = PyMuPDFLoader(file_path=self.file_path)
+        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
+            chunk_size=200, chunk_overlap=30
+        )
+        return loader.load_and_split(text_splitter=text_splitter)
+
+    def get_embeddings(self):
+        prompt = PromptTemplate(input_variables=["question"], template=self.prompt_template)
+        llm_chain = self.llm | prompt
+        return HypotheticalDocumentEmbedder(
+            llm_chain=llm_chain,
+            base_embeddings=self.base_embeddings
+        ).from_llm(llm=self.llm, base_embeddings=self.base_embeddings, prompt_key="web_search")
+
+    def setup_qdrant_collection(self, embeddings, documents):
+        if not self.client.collection_exists(collection_name=self.collection_name):
+            self.client.create_collection(
+                collection_name=self.collection_name,
+                vectors_config={
+                    "content": VectorParams(size=384, distance=Distance.COSINE)
+                }
+            )
+            self.vector_store = QdrantVectorStore(
+                client=self.client,
+                collection_name=self.collection_name,
+                embedding=embeddings,
+                vector_name=self.vector_name
+            )
+            self.vector_store.add_documents(documents=documents)
+        else:
+            self.vector_store = QdrantVectorStore.from_existing_collection(
+                collection_name=self.collection_name,
+                url=self.qdrant_url,
+                api_key=self.qdrant_api_key,
+                vector_name=self.vector_name,
+                embedding=embeddings
+            )
+
+    def execute_pipeline(self, user_query):
+        retriever = self.vector_store.as_retriever(
+            search_type="similarity_score_threshold",
+            search_kwargs={'score_threshold': 0.8}
+        )
+        prompt = ChatPromptTemplate.from_template(self.prompt_template)
+        chain = (
+                {"context": retriever, "question": RunnablePassthrough()}
+                | prompt
+                | self.llm
+                | StrOutputParser()
+        )
+        return chain.invoke(user_query)
+
+
+
diff --git a/bootstraprag/templates/langchain/rag_with_hyde/data/mlops.pdf b/bootstraprag/templates/langchain/rag_with_hyde/data/mlops.pdf
diff --git a/bootstraprag/templates/langchain/rag_with_hyde/main.py b/bootstraprag/templates/langchain/rag_with_hyde/main.py
@@ -0,0 +1,49 @@
+import os
+
+from core_advanced_rag import RetrievalAugmentationGenerationUsingHyDE
+from dotenv import load_dotenv, find_dotenv
+
+load_dotenv(find_dotenv())
+
+if __name__ == "__main__":
+    # Configurations
+    FILE_PATH = 'data/mlops.pdf'
+    COLLECTION_NAME = os.environ.get('COLLECTION_NAME')
+    VECTOR_NAME = os.environ.get('VECTOR_NAME')
+    PROMPT_TEMPLATE = """Answer the question based only on the following context:
+    {context}
+
+    Question: {question}
+    """
+    LLM_MODEL = os.environ.get('LLM_MODEL')
+    EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL')
+    QDRANT_URL = os.environ.get('QDRANT_URL')
+    QDRANT_API_KEY = os.environ.get('QDRANT_API_KEY')
+    OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL')
+
+    # Initialize QnA Pipeline Handler
+    pipeline_handler = RetrievalAugmentationGenerationUsingHyDE(
+        file_path=FILE_PATH,
+        collection_name=COLLECTION_NAME,
+        vector_name=VECTOR_NAME,
+        prompt_template=PROMPT_TEMPLATE,
+        llm_model=LLM_MODEL,
+        embedding_model=EMBEDDING_MODEL,
+        qdrant_url=QDRANT_URL,
+        qdrant_api_key=QDRANT_API_KEY,
+        base_url=OLLAMA_BASE_URL
+    )
+
+    # Load Documents
+    documents = pipeline_handler.load_documents()
+
+    # Get Embeddings
+    embeddings = pipeline_handler.get_embeddings()
+
+    # Setup Qdrant and Add Documents
+    pipeline_handler.setup_qdrant_collection(embeddings=embeddings, documents=documents)
+
+    # Execute Retrieval Pipeline
+    question = "what are the system and operational challenges of mlops?"
+    output = pipeline_handler.execute_pipeline(user_query=question)
+    print(output)
diff --git a/bootstraprag/templates/langchain/rag_with_hyde/readme.md b/bootstraprag/templates/langchain/rag_with_hyde/readme.md
@@ -0,0 +1,74 @@
+# Advanced RAG with HyDE Project
+
+This project implements a Advanced RAG with HyDE based Question-Answering system using LangChain, Ollama, and Qdrant.
+
+## Prerequisites
+
+- Python 3.8 or higher
+- Ollama running locally (for LLM)
+- Qdrant running locally (for vector storage)
+
+## project structure
+```tree
+.
+├── Dockerfile
+├── __init__.py
+├── api_server.py
+├── client.py
+├── core_advanced_rag.py
+├── data
+│   └── mlops.pdf
+├── main.py
+├── readme.md
+└── requirements.txt
+```
+
+## Installation
+
+1. `pip install bootstrap-rag`
+
+### Setting up Ollama and Qdrant
+Method 1:
+1. navigate to root_folder/setups
+2. run the docker-compose-dev.yml
+3. run the pull_model as per the underlying OS
+
+Method 2:
+1. Install and run Ollama:
+    - Follow the instructions at [Ollama's official website](https://ollama.ai/) to install Ollama.
+    - Make sure Ollama is running and accessible at `http://localhost:11434`.
+
+2. Install and run Qdrant:
+    - Follow the instructions at [Qdrant's official website](https://qdrant.tech/documentation/quick-start/) to install Qdrant.
+    - Make sure Qdrant is running and accessible at `http://localhost:6333`.
+
+## How to Run
+1. Create a virtual environment (optional but recommended):
+   ```
+   python -m venv venv
+   source venv/bin/activate  # On Windows, use `venv\Scripts\activate`
+   ```
+2. run `bootstraprag create <your_poc_project_name>`
+
+3. Install the required dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+
+### Usage
+
+1. Prepare your MLOps PDF document and place it in the `data` directory.
+
+2. Update the `.env` file with your specific configuration:
+    - Update the `file_path` to point to your PDF document.
+    - Update the `collection_name` if you want to use a different name for your Qdrant collection.
+    - Update the `qdrant_url` and `qdrant_api_key` if your Qdrant setup is different.
+
+3. Run the script:
+   ```
+   python main.py
+   ```
+   or
+   ```
+   python api_server.py
+   ```
diff --git a/bootstraprag/templates/langchain/rag_with_hyde/requirements.txt b/bootstraprag/templates/langchain/rag_with_hyde/requirements.txt
@@ -0,0 +1,8 @@
+langchain==0.3.3
+langchain-ollama==0.2.0
+langchain-community==0.3.2
+langchain-qdrant==0.1.4
+unstructured[all-docs]==0.15.14
+pydantic==2.9.2
+qdrant-client==1.12.0
+litserve==0.2.2
diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/.env b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/.env
@@ -0,0 +1,10 @@
+COLLECTION_NAME="summaries"
+VECTOR_NAME="content"
+LLM_MODEL="llama3.1:latest"
+OLLAMA_BASE_URL="http://localhost:11434"
+EMBEDDING_MODEL="snowflake-arctic-embed:33m"
+QDRANT_URL="http://localhost:6333"
+QDRANT_API_KEY="th3s3cr3tk3y"
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=2
diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/Dockerfile b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/Dockerfile
@@ -0,0 +1,24 @@
+# Use the official Python image from the Docker Hub
+FROM python:3.9-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the requirements file to the container
+COPY requirements.txt .
+
+# Install the required dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the current directory contents into the container at /app
+COPY . .
+
+# Set environment variables (you can replace these with values from your .env file or other configs)
+ENV QDRANT_URL='http://host.docker.internal:6333' \
+    OLLAMA_BASE_URL='http://host.docker.internal:11434'
+
+# Expose port 8000 for external access
+EXPOSE 8000
+
+# Command to run your application
+CMD ["python", "api_server.py"]
diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/__init__.py b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/__init__.py