diff --git a/bootstraprag/templates/langchain/rag_with_hyde/.env b/bootstraprag/templates/langchain/rag_with_hyde/.env new file mode 100644 index 0000000..8ad32b9 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde/.env @@ -0,0 +1,10 @@ +COLLECTION_NAME="summaries" +VECTOR_NAME="content" +LLM_MODEL="llama3.1:latest" +OLLAMA_BASE_URL="http://localhost:11434" +EMBEDDING_MODEL="snowflake-arctic-embed:33m" +QDRANT_URL="http://localhost:6333" +QDRANT_API_KEY="th3s3cr3tk3y" + +LIT_SERVER_PORT=8000 +LIT_SERVER_WORKERS_PER_DEVICE=2 \ No newline at end of file diff --git a/bootstraprag/templates/langchain/rag_with_hyde/Dockerfile b/bootstraprag/templates/langchain/rag_with_hyde/Dockerfile new file mode 100644 index 0000000..22aa025 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde/Dockerfile @@ -0,0 +1,24 @@ +# Use the official Python image from the Docker Hub +FROM python:3.9-slim + +# Set the working directory in the container +WORKDIR /app + +# Copy the requirements file to the container +COPY requirements.txt . + +# Install the required dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the current directory contents into the container at /app +COPY . . + +# Set environment variables (you can replace these with values from your .env file or other configs) +ENV QDRANT_URL='http://host.docker.internal:6333' \ + OLLAMA_BASE_URL='http://host.docker.internal:11434' + +# Expose port 8000 for external access +EXPOSE 8000 + +# Command to run your application +CMD ["python", "api_server.py"] diff --git a/bootstraprag/templates/langchain/rag_with_hyde/__init__.py b/bootstraprag/templates/langchain/rag_with_hyde/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/langchain/rag_with_hyde/api_server.py b/bootstraprag/templates/langchain/rag_with_hyde/api_server.py new file mode 100644 index 0000000..5173107 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde/api_server.py @@ -0,0 +1,64 @@ +from abc import ABC +from dotenv import load_dotenv, find_dotenv +from openai import base_url + +from core_advanced_rag import RetrievalAugmentationGenerationUsingHyDE +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class SimpleRAGServingAPI(ls.LitAPI, ABC): + def __init__(self): + self.advanced_rag: RetrievalAugmentationGenerationUsingHyDE = None + self.FILE_PATH = 'data/mlops.pdf' + self.COLLECTION_NAME = os.environ.get('COLLECTION_NAME') + self.VECTOR_NAME = os.environ.get('VECTOR_NAME') + self.PROMPT_TEMPLATE = """Answer the question based only on the following context: + {context} + + Question: {question} + """ + self.LLM_MODEL = os.environ.get('LLM_MODEL') + self.EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL') + self.QDRANT_URL = os.environ.get('QDRANT_URL') + self.QDRANT_API_KEY = os.environ.get('QDRANT_API_KEY') + self.OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL') + + def setup(self, devices): + self.advanced_rag = RetrievalAugmentationGenerationUsingHyDE( + file_path=self.FILE_PATH, + collection_name=self.COLLECTION_NAME, + vector_name=self.VECTOR_NAME, + prompt_template=self.PROMPT_TEMPLATE, + llm_model=self.LLM_MODEL, + embedding_model=self.EMBEDDING_MODEL, + qdrant_url=self.QDRANT_URL, + qdrant_api_key=self.QDRANT_API_KEY, + base_url=self.OLLAMA_BASE_URL + ) + # Load Documents + documents = self.advanced_rag.load_documents() + + # Get Embeddings + embeddings = self.advanced_rag.get_embeddings() + + # Setup Qdrant and Add Documents + self.advanced_rag.setup_qdrant_collection(embeddings=embeddings, documents=documents) + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str): + return self.advanced_rag.execute_pipeline(user_query=query) + + def encode_response(self, output, **kwargs): + return {'response': output} + + +if __name__ == '__main__': + api = SimpleRAGServingAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/langchain/rag_with_hyde/client.py b/bootstraprag/templates/langchain/rag_with_hyde/client.py new file mode 100644 index 0000000..e396db2 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde/client.py @@ -0,0 +1,17 @@ +# Copyright The Lightning AI team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import requests + +response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0}) +print(f"Status: {response.status_code}\nResponse:\n {response.text}") diff --git a/bootstraprag/templates/langchain/rag_with_hyde/core_advanced_rag.py b/bootstraprag/templates/langchain/rag_with_hyde/core_advanced_rag.py new file mode 100644 index 0000000..5005c29 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde/core_advanced_rag.py @@ -0,0 +1,82 @@ +from langchain_core.output_parsers import StrOutputParser +from langchain.chains.hyde.base import HypotheticalDocumentEmbedder +from langchain.prompts import PromptTemplate +from langchain_qdrant import QdrantVectorStore +from langchain_community.document_loaders import PyMuPDFLoader +from langchain_ollama import OllamaEmbeddings, ChatOllama +from langchain_text_splitters import RecursiveCharacterTextSplitter +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.runnables import RunnablePassthrough +from qdrant_client.http.models import Distance, VectorParams +import qdrant_client + + +class RetrievalAugmentationGenerationUsingHyDE: + def __init__(self, file_path, collection_name, vector_name, prompt_template, llm_model, embedding_model, qdrant_url, + qdrant_api_key, base_url): + self.file_path = file_path + self.collection_name = collection_name + self.vector_name = vector_name + self.prompt_template = prompt_template + self.qdrant_url = qdrant_url + self.qdrant_api_key = qdrant_api_key + self.llm = ChatOllama(model=llm_model, temperature=0.2, base_url=base_url) + self.base_embeddings = OllamaEmbeddings(model=embedding_model) + self.client = qdrant_client.QdrantClient(url=qdrant_url, api_key=qdrant_api_key) + self.vector_store = None + + def load_documents(self): + loader = PyMuPDFLoader(file_path=self.file_path) + text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( + chunk_size=200, chunk_overlap=30 + ) + return loader.load_and_split(text_splitter=text_splitter) + + def get_embeddings(self): + prompt = PromptTemplate(input_variables=["question"], template=self.prompt_template) + llm_chain = self.llm | prompt + return HypotheticalDocumentEmbedder( + llm_chain=llm_chain, + base_embeddings=self.base_embeddings + ).from_llm(llm=self.llm, base_embeddings=self.base_embeddings, prompt_key="web_search") + + def setup_qdrant_collection(self, embeddings, documents): + if not self.client.collection_exists(collection_name=self.collection_name): + self.client.create_collection( + collection_name=self.collection_name, + vectors_config={ + "content": VectorParams(size=384, distance=Distance.COSINE) + } + ) + self.vector_store = QdrantVectorStore( + client=self.client, + collection_name=self.collection_name, + embedding=embeddings, + vector_name=self.vector_name + ) + self.vector_store.add_documents(documents=documents) + else: + self.vector_store = QdrantVectorStore.from_existing_collection( + collection_name=self.collection_name, + url=self.qdrant_url, + api_key=self.qdrant_api_key, + vector_name=self.vector_name, + embedding=embeddings + ) + + def execute_pipeline(self, user_query): + retriever = self.vector_store.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={'score_threshold': 0.8} + ) + prompt = ChatPromptTemplate.from_template(self.prompt_template) + chain = ( + {"context": retriever, "question": RunnablePassthrough()} + | prompt + | self.llm + | StrOutputParser() + ) + return chain.invoke(user_query) + + + diff --git a/bootstraprag/templates/langchain/rag_with_hyde/data/mlops.pdf b/bootstraprag/templates/langchain/rag_with_hyde/data/mlops.pdf new file mode 100644 index 0000000..c8d8170 Binary files /dev/null and b/bootstraprag/templates/langchain/rag_with_hyde/data/mlops.pdf differ diff --git a/bootstraprag/templates/langchain/rag_with_hyde/main.py b/bootstraprag/templates/langchain/rag_with_hyde/main.py new file mode 100644 index 0000000..3292a42 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde/main.py @@ -0,0 +1,49 @@ +import os + +from core_advanced_rag import RetrievalAugmentationGenerationUsingHyDE +from dotenv import load_dotenv, find_dotenv + +load_dotenv(find_dotenv()) + +if __name__ == "__main__": + # Configurations + FILE_PATH = 'data/mlops.pdf' + COLLECTION_NAME = os.environ.get('COLLECTION_NAME') + VECTOR_NAME = os.environ.get('VECTOR_NAME') + PROMPT_TEMPLATE = """Answer the question based only on the following context: + {context} + + Question: {question} + """ + LLM_MODEL = os.environ.get('LLM_MODEL') + EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL') + QDRANT_URL = os.environ.get('QDRANT_URL') + QDRANT_API_KEY = os.environ.get('QDRANT_API_KEY') + OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL') + + # Initialize QnA Pipeline Handler + pipeline_handler = RetrievalAugmentationGenerationUsingHyDE( + file_path=FILE_PATH, + collection_name=COLLECTION_NAME, + vector_name=VECTOR_NAME, + prompt_template=PROMPT_TEMPLATE, + llm_model=LLM_MODEL, + embedding_model=EMBEDDING_MODEL, + qdrant_url=QDRANT_URL, + qdrant_api_key=QDRANT_API_KEY, + base_url=OLLAMA_BASE_URL + ) + + # Load Documents + documents = pipeline_handler.load_documents() + + # Get Embeddings + embeddings = pipeline_handler.get_embeddings() + + # Setup Qdrant and Add Documents + pipeline_handler.setup_qdrant_collection(embeddings=embeddings, documents=documents) + + # Execute Retrieval Pipeline + question = "what are the system and operational challenges of mlops?" + output = pipeline_handler.execute_pipeline(user_query=question) + print(output) diff --git a/bootstraprag/templates/langchain/rag_with_hyde/readme.md b/bootstraprag/templates/langchain/rag_with_hyde/readme.md new file mode 100644 index 0000000..5d1a2e0 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde/readme.md @@ -0,0 +1,74 @@ +# Advanced RAG with HyDE Project + +This project implements a Advanced RAG with HyDE based Question-Answering system using LangChain, Ollama, and Qdrant. + +## Prerequisites + +- Python 3.8 or higher +- Ollama running locally (for LLM) +- Qdrant running locally (for vector storage) + +## project structure +```tree +. +├── Dockerfile +├── __init__.py +├── api_server.py +├── client.py +├── core_advanced_rag.py +├── data +│   └── mlops.pdf +├── main.py +├── readme.md +└── requirements.txt +``` + +## Installation + +1. `pip install bootstrap-rag` + +### Setting up Ollama and Qdrant +Method 1: +1. navigate to root_folder/setups +2. run the docker-compose-dev.yml +3. run the pull_model as per the underlying OS + +Method 2: +1. Install and run Ollama: + - Follow the instructions at [Ollama's official website](https://ollama.ai/) to install Ollama. + - Make sure Ollama is running and accessible at `http://localhost:11434`. + +2. Install and run Qdrant: + - Follow the instructions at [Qdrant's official website](https://qdrant.tech/documentation/quick-start/) to install Qdrant. + - Make sure Qdrant is running and accessible at `http://localhost:6333`. + +## How to Run +1. Create a virtual environment (optional but recommended): + ``` + python -m venv venv + source venv/bin/activate # On Windows, use `venv\Scripts\activate` + ``` +2. run `bootstraprag create ` + +3. Install the required dependencies: + ``` + pip install -r requirements.txt + ``` + +### Usage + +1. Prepare your MLOps PDF document and place it in the `data` directory. + +2. Update the `.env` file with your specific configuration: + - Update the `file_path` to point to your PDF document. + - Update the `collection_name` if you want to use a different name for your Qdrant collection. + - Update the `qdrant_url` and `qdrant_api_key` if your Qdrant setup is different. + +3. Run the script: + ``` + python main.py + ``` + or + ``` + python api_server.py + ``` \ No newline at end of file diff --git a/bootstraprag/templates/langchain/rag_with_hyde/requirements.txt b/bootstraprag/templates/langchain/rag_with_hyde/requirements.txt new file mode 100644 index 0000000..77da22c --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde/requirements.txt @@ -0,0 +1,8 @@ +langchain==0.3.3 +langchain-ollama==0.2.0 +langchain-community==0.3.2 +langchain-qdrant==0.1.4 +unstructured[all-docs]==0.15.14 +pydantic==2.9.2 +qdrant-client==1.12.0 +litserve==0.2.2 \ No newline at end of file diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/.env b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/.env new file mode 100644 index 0000000..8ad32b9 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/.env @@ -0,0 +1,10 @@ +COLLECTION_NAME="summaries" +VECTOR_NAME="content" +LLM_MODEL="llama3.1:latest" +OLLAMA_BASE_URL="http://localhost:11434" +EMBEDDING_MODEL="snowflake-arctic-embed:33m" +QDRANT_URL="http://localhost:6333" +QDRANT_API_KEY="th3s3cr3tk3y" + +LIT_SERVER_PORT=8000 +LIT_SERVER_WORKERS_PER_DEVICE=2 \ No newline at end of file diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/Dockerfile b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/Dockerfile new file mode 100644 index 0000000..22aa025 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/Dockerfile @@ -0,0 +1,24 @@ +# Use the official Python image from the Docker Hub +FROM python:3.9-slim + +# Set the working directory in the container +WORKDIR /app + +# Copy the requirements file to the container +COPY requirements.txt . + +# Install the required dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the current directory contents into the container at /app +COPY . . + +# Set environment variables (you can replace these with values from your .env file or other configs) +ENV QDRANT_URL='http://host.docker.internal:6333' \ + OLLAMA_BASE_URL='http://host.docker.internal:11434' + +# Expose port 8000 for external access +EXPOSE 8000 + +# Command to run your application +CMD ["python", "api_server.py"] diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/__init__.py b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/api_server.py b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/api_server.py new file mode 100644 index 0000000..5173107 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/api_server.py @@ -0,0 +1,64 @@ +from abc import ABC +from dotenv import load_dotenv, find_dotenv +from openai import base_url + +from core_advanced_rag import RetrievalAugmentationGenerationUsingHyDE +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class SimpleRAGServingAPI(ls.LitAPI, ABC): + def __init__(self): + self.advanced_rag: RetrievalAugmentationGenerationUsingHyDE = None + self.FILE_PATH = 'data/mlops.pdf' + self.COLLECTION_NAME = os.environ.get('COLLECTION_NAME') + self.VECTOR_NAME = os.environ.get('VECTOR_NAME') + self.PROMPT_TEMPLATE = """Answer the question based only on the following context: + {context} + + Question: {question} + """ + self.LLM_MODEL = os.environ.get('LLM_MODEL') + self.EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL') + self.QDRANT_URL = os.environ.get('QDRANT_URL') + self.QDRANT_API_KEY = os.environ.get('QDRANT_API_KEY') + self.OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL') + + def setup(self, devices): + self.advanced_rag = RetrievalAugmentationGenerationUsingHyDE( + file_path=self.FILE_PATH, + collection_name=self.COLLECTION_NAME, + vector_name=self.VECTOR_NAME, + prompt_template=self.PROMPT_TEMPLATE, + llm_model=self.LLM_MODEL, + embedding_model=self.EMBEDDING_MODEL, + qdrant_url=self.QDRANT_URL, + qdrant_api_key=self.QDRANT_API_KEY, + base_url=self.OLLAMA_BASE_URL + ) + # Load Documents + documents = self.advanced_rag.load_documents() + + # Get Embeddings + embeddings = self.advanced_rag.get_embeddings() + + # Setup Qdrant and Add Documents + self.advanced_rag.setup_qdrant_collection(embeddings=embeddings, documents=documents) + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str): + return self.advanced_rag.execute_pipeline(user_query=query) + + def encode_response(self, output, **kwargs): + return {'response': output} + + +if __name__ == '__main__': + api = SimpleRAGServingAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/client.py b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/client.py new file mode 100644 index 0000000..e396db2 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/client.py @@ -0,0 +1,17 @@ +# Copyright The Lightning AI team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import requests + +response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0}) +print(f"Status: {response.status_code}\nResponse:\n {response.text}") diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/core_advanced_rag.py b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/core_advanced_rag.py new file mode 100644 index 0000000..f6d1c75 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/core_advanced_rag.py @@ -0,0 +1,91 @@ +from langchain_core.output_parsers import StrOutputParser +from langchain.chains.hyde.base import HypotheticalDocumentEmbedder +from langchain.prompts import PromptTemplate +from langchain_qdrant import QdrantVectorStore +from langchain_community.document_loaders import PyMuPDFLoader +from langchain_ollama import OllamaEmbeddings, ChatOllama +from langchain_text_splitters import RecursiveCharacterTextSplitter +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.runnables import RunnablePassthrough +from qdrant_client.http.models import Distance, VectorParams +from phoenix.otel import register +from openinference.instrumentation.langchain import LangChainInstrumentor +import phoenix as px +import qdrant_client + +px.launch_app() +tracer_provider = register( + project_name="rag-with-hyde", + endpoint="http://127.0.0.1:4317", # change this to remote if needed + set_global_tracer_provider=True + +) +LangChainInstrumentor().instrument(tracer_provider=tracer_provider, skip_dep_check=True) + + +class RetrievalAugmentationGenerationUsingHyDE: + def __init__(self, file_path, collection_name, vector_name, prompt_template, llm_model, embedding_model, qdrant_url, + qdrant_api_key, base_url): + self.file_path = file_path + self.collection_name = collection_name + self.vector_name = vector_name + self.prompt_template = prompt_template + self.qdrant_url = qdrant_url + self.qdrant_api_key = qdrant_api_key + self.llm = ChatOllama(model=llm_model, temperature=0.2, base_url=base_url) + self.base_embeddings = OllamaEmbeddings(model=embedding_model) + self.client = qdrant_client.QdrantClient(url=qdrant_url, api_key=qdrant_api_key) + self.vector_store = None + + def load_documents(self): + loader = PyMuPDFLoader(file_path=self.file_path) + text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( + chunk_size=200, chunk_overlap=30 + ) + return loader.load_and_split(text_splitter=text_splitter) + + def get_embeddings(self): + prompt = PromptTemplate(input_variables=["question"], template=self.prompt_template) + llm_chain = self.llm | prompt + return HypotheticalDocumentEmbedder( + llm_chain=llm_chain, + base_embeddings=self.base_embeddings + ).from_llm(llm=self.llm, base_embeddings=self.base_embeddings, prompt_key="web_search") + + def setup_qdrant_collection(self, embeddings, documents): + if not self.client.collection_exists(collection_name=self.collection_name): + self.client.create_collection( + collection_name=self.collection_name, + vectors_config={ + "content": VectorParams(size=384, distance=Distance.COSINE) + } + ) + self.vector_store = QdrantVectorStore( + client=self.client, + collection_name=self.collection_name, + embedding=embeddings, + vector_name=self.vector_name + ) + self.vector_store.add_documents(documents=documents) + else: + self.vector_store = QdrantVectorStore.from_existing_collection( + collection_name=self.collection_name, + url=self.qdrant_url, + api_key=self.qdrant_api_key, + vector_name=self.vector_name, + embedding=embeddings + ) + + def execute_pipeline(self, user_query): + retriever = self.vector_store.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={'score_threshold': 0.8} + ) + prompt = ChatPromptTemplate.from_template(self.prompt_template) + chain = ( + {"context": retriever, "question": RunnablePassthrough()} + | prompt + | self.llm + | StrOutputParser() + ) + return chain.invoke(user_query) diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/data/mlops.pdf b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/data/mlops.pdf new file mode 100644 index 0000000..c8d8170 Binary files /dev/null and b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/data/mlops.pdf differ diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/main.py b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/main.py new file mode 100644 index 0000000..3292a42 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/main.py @@ -0,0 +1,49 @@ +import os + +from core_advanced_rag import RetrievalAugmentationGenerationUsingHyDE +from dotenv import load_dotenv, find_dotenv + +load_dotenv(find_dotenv()) + +if __name__ == "__main__": + # Configurations + FILE_PATH = 'data/mlops.pdf' + COLLECTION_NAME = os.environ.get('COLLECTION_NAME') + VECTOR_NAME = os.environ.get('VECTOR_NAME') + PROMPT_TEMPLATE = """Answer the question based only on the following context: + {context} + + Question: {question} + """ + LLM_MODEL = os.environ.get('LLM_MODEL') + EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL') + QDRANT_URL = os.environ.get('QDRANT_URL') + QDRANT_API_KEY = os.environ.get('QDRANT_API_KEY') + OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL') + + # Initialize QnA Pipeline Handler + pipeline_handler = RetrievalAugmentationGenerationUsingHyDE( + file_path=FILE_PATH, + collection_name=COLLECTION_NAME, + vector_name=VECTOR_NAME, + prompt_template=PROMPT_TEMPLATE, + llm_model=LLM_MODEL, + embedding_model=EMBEDDING_MODEL, + qdrant_url=QDRANT_URL, + qdrant_api_key=QDRANT_API_KEY, + base_url=OLLAMA_BASE_URL + ) + + # Load Documents + documents = pipeline_handler.load_documents() + + # Get Embeddings + embeddings = pipeline_handler.get_embeddings() + + # Setup Qdrant and Add Documents + pipeline_handler.setup_qdrant_collection(embeddings=embeddings, documents=documents) + + # Execute Retrieval Pipeline + question = "what are the system and operational challenges of mlops?" + output = pipeline_handler.execute_pipeline(user_query=question) + print(output) diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/readme.md b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/readme.md new file mode 100644 index 0000000..9ac65e8 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/readme.md @@ -0,0 +1,77 @@ +# Advanced RAG with HyDE Project + +This project implements a Advanced RAG with HyDE based Question-Answering system using LangChain, Ollama, and Qdrant. + +## Prerequisites + +- Python 3.8 or higher +- Ollama running locally (for LLM) +- Qdrant running locally (for vector storage) + +## project structure +```tree +. +├── Dockerfile +├── __init__.py +├── api_server.py +├── client.py +├── core_advanced_rag.py +├── data +│   └── mlops.pdf +├── main.py +├── readme.md +└── requirements.txt +``` + +## Installation + +1. `pip install bootstrap-rag` + +### Setting up Ollama and Qdrant +Method 1: +1. navigate to root_folder/setups +2. run the docker-compose-dev.yml +3. run the pull_model as per the underlying OS + +Method 2: +1. Install and run Ollama: + - Follow the instructions at [Ollama's official website](https://ollama.ai/) to install Ollama. + - Make sure Ollama is running and accessible at `http://localhost:11434`. + +2. Install and run Qdrant: + - Follow the instructions at [Qdrant's official website](https://qdrant.tech/documentation/quick-start/) to install Qdrant. + - Make sure Qdrant is running and accessible at `http://localhost:6333`. + +## How to Run +1. Create a virtual environment (optional but recommended): + ``` + python -m venv venv + source venv/bin/activate # On Windows, use `venv\Scripts\activate` + ``` +2. run `bootstraprag create ` + +3. Install the required dependencies: + ``` + pip install -r requirements.txt + ``` + +### Usage + +1. Prepare your MLOps PDF document and place it in the `data` directory. + +2. Update the `.env` file with your specific configuration: + - Update the `file_path` to point to your PDF document. + - Update the `collection_name` if you want to use a different name for your Qdrant collection. + - Update the `qdrant_url` and `qdrant_api_key` if your Qdrant setup is different. + +3. Run the script: + ``` + python main.py + ``` + or + ``` + python api_server.py + ``` + +- This project is enabled with Arize Phoenix observability, launch your browser +navigate to `http://localhost:6006` \ No newline at end of file diff --git a/bootstraprag/templates/langchain/rag_with_hyde_with_observability/requirements.txt b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/requirements.txt new file mode 100644 index 0000000..4a17b47 --- /dev/null +++ b/bootstraprag/templates/langchain/rag_with_hyde_with_observability/requirements.txt @@ -0,0 +1,11 @@ +langchain==0.3.3 +langchain-ollama==0.2.0 +langchain-community==0.3.2 +langchain-qdrant==0.1.4 +unstructured[all-docs]==0.15.14 +pydantic==2.9.2 +qdrant-client==1.12.0 +litserve==0.2.2 +arize-phoenix==5.2.2 +arize-phoenix-otel==0.5.1 +openinference-instrumentation-langchain==0.1.28 \ No newline at end of file diff --git a/bootstraprag/templates/langchain/simple_rag/requirements.txt b/bootstraprag/templates/langchain/simple_rag/requirements.txt index 5315f33..dc81f83 100644 --- a/bootstraprag/templates/langchain/simple_rag/requirements.txt +++ b/bootstraprag/templates/langchain/simple_rag/requirements.txt @@ -8,4 +8,4 @@ fastembed==0.3.6 PyMuPDF==1.24.11 python-dotenv==1.0.1 litserve==0.2.2 -pydantic==2.9.0 \ No newline at end of file +pydantic==2.9.2 \ No newline at end of file diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/simple_rag.py b/bootstraprag/templates/langchain/simple_rag_with_observability/simple_rag.py index 42c0563..da3db5f 100644 --- a/bootstraprag/templates/langchain/simple_rag_with_observability/simple_rag.py +++ b/bootstraprag/templates/langchain/simple_rag_with_observability/simple_rag.py @@ -18,7 +18,7 @@ from openinference.instrumentation.langchain import LangChainInstrumentor import phoenix as px -px.launch_app().view() +px.launch_app() tracer_provider = register( project_name="simple-rag", endpoint="http://127.0.0.1:4317", # change this to remote if needed