Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions bootstraprag/templates/langchain/rag_with_hyde/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
COLLECTION_NAME="summaries"
VECTOR_NAME="content"
LLM_MODEL="llama3.1:latest"
OLLAMA_BASE_URL="http://localhost:11434"
EMBEDDING_MODEL="snowflake-arctic-embed:33m"
QDRANT_URL="http://localhost:6333"
QDRANT_API_KEY="th3s3cr3tk3y"

LIT_SERVER_PORT=8000
LIT_SERVER_WORKERS_PER_DEVICE=2
24 changes: 24 additions & 0 deletions bootstraprag/templates/langchain/rag_with_hyde/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Use the official Python image from the Docker Hub
FROM python:3.9-slim

# Set the working directory in the container
WORKDIR /app

# Copy the requirements file to the container
COPY requirements.txt .

# Install the required dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy the current directory contents into the container at /app
COPY . .

# Set environment variables (you can replace these with values from your .env file or other configs)
ENV QDRANT_URL='http://host.docker.internal:6333' \
OLLAMA_BASE_URL='http://host.docker.internal:11434'

# Expose port 8000 for external access
EXPOSE 8000

# Command to run your application
CMD ["python", "api_server.py"]
Empty file.
64 changes: 64 additions & 0 deletions bootstraprag/templates/langchain/rag_with_hyde/api_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from abc import ABC
from dotenv import load_dotenv, find_dotenv
from openai import base_url

from core_advanced_rag import RetrievalAugmentationGenerationUsingHyDE
import litserve as ls
import os

_ = load_dotenv(find_dotenv())


class SimpleRAGServingAPI(ls.LitAPI, ABC):
def __init__(self):
self.advanced_rag: RetrievalAugmentationGenerationUsingHyDE = None
self.FILE_PATH = 'data/mlops.pdf'
self.COLLECTION_NAME = os.environ.get('COLLECTION_NAME')
self.VECTOR_NAME = os.environ.get('VECTOR_NAME')
self.PROMPT_TEMPLATE = """Answer the question based only on the following context:
{context}

Question: {question}
"""
self.LLM_MODEL = os.environ.get('LLM_MODEL')
self.EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL')
self.QDRANT_URL = os.environ.get('QDRANT_URL')
self.QDRANT_API_KEY = os.environ.get('QDRANT_API_KEY')
self.OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL')

def setup(self, devices):
self.advanced_rag = RetrievalAugmentationGenerationUsingHyDE(
file_path=self.FILE_PATH,
collection_name=self.COLLECTION_NAME,
vector_name=self.VECTOR_NAME,
prompt_template=self.PROMPT_TEMPLATE,
llm_model=self.LLM_MODEL,
embedding_model=self.EMBEDDING_MODEL,
qdrant_url=self.QDRANT_URL,
qdrant_api_key=self.QDRANT_API_KEY,
base_url=self.OLLAMA_BASE_URL
)
# Load Documents
documents = self.advanced_rag.load_documents()

# Get Embeddings
embeddings = self.advanced_rag.get_embeddings()

# Setup Qdrant and Add Documents
self.advanced_rag.setup_qdrant_collection(embeddings=embeddings, documents=documents)

def decode_request(self, request, **kwargs):
return request["query"]

def predict(self, query: str):
return self.advanced_rag.execute_pipeline(user_query=query)

def encode_response(self, output, **kwargs):
return {'response': output}


if __name__ == '__main__':
api = SimpleRAGServingAPI()
server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
server.run(port=os.environ.get('LIT_SERVER_PORT'))
17 changes: 17 additions & 0 deletions bootstraprag/templates/langchain/rag_with_hyde/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests

response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
print(f"Status: {response.status_code}\nResponse:\n {response.text}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from langchain_core.output_parsers import StrOutputParser
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
from langchain.prompts import PromptTemplate
from langchain_qdrant import QdrantVectorStore
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from qdrant_client.http.models import Distance, VectorParams
import qdrant_client


class RetrievalAugmentationGenerationUsingHyDE:
def __init__(self, file_path, collection_name, vector_name, prompt_template, llm_model, embedding_model, qdrant_url,
qdrant_api_key, base_url):
self.file_path = file_path
self.collection_name = collection_name
self.vector_name = vector_name
self.prompt_template = prompt_template
self.qdrant_url = qdrant_url
self.qdrant_api_key = qdrant_api_key
self.llm = ChatOllama(model=llm_model, temperature=0.2, base_url=base_url)
self.base_embeddings = OllamaEmbeddings(model=embedding_model)
self.client = qdrant_client.QdrantClient(url=qdrant_url, api_key=qdrant_api_key)
self.vector_store = None

def load_documents(self):
loader = PyMuPDFLoader(file_path=self.file_path)
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=200, chunk_overlap=30
)
return loader.load_and_split(text_splitter=text_splitter)

def get_embeddings(self):
prompt = PromptTemplate(input_variables=["question"], template=self.prompt_template)
llm_chain = self.llm | prompt
return HypotheticalDocumentEmbedder(
llm_chain=llm_chain,
base_embeddings=self.base_embeddings
).from_llm(llm=self.llm, base_embeddings=self.base_embeddings, prompt_key="web_search")

def setup_qdrant_collection(self, embeddings, documents):
if not self.client.collection_exists(collection_name=self.collection_name):
self.client.create_collection(
collection_name=self.collection_name,
vectors_config={
"content": VectorParams(size=384, distance=Distance.COSINE)
}
)
self.vector_store = QdrantVectorStore(
client=self.client,
collection_name=self.collection_name,
embedding=embeddings,
vector_name=self.vector_name
)
self.vector_store.add_documents(documents=documents)
else:
self.vector_store = QdrantVectorStore.from_existing_collection(
collection_name=self.collection_name,
url=self.qdrant_url,
api_key=self.qdrant_api_key,
vector_name=self.vector_name,
embedding=embeddings
)

def execute_pipeline(self, user_query):
retriever = self.vector_store.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={'score_threshold': 0.8}
)
prompt = ChatPromptTemplate.from_template(self.prompt_template)
chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| self.llm
| StrOutputParser()
)
return chain.invoke(user_query)



Binary file not shown.
49 changes: 49 additions & 0 deletions bootstraprag/templates/langchain/rag_with_hyde/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os

from core_advanced_rag import RetrievalAugmentationGenerationUsingHyDE
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

if __name__ == "__main__":
# Configurations
FILE_PATH = 'data/mlops.pdf'
COLLECTION_NAME = os.environ.get('COLLECTION_NAME')
VECTOR_NAME = os.environ.get('VECTOR_NAME')
PROMPT_TEMPLATE = """Answer the question based only on the following context:
{context}

Question: {question}
"""
LLM_MODEL = os.environ.get('LLM_MODEL')
EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL')
QDRANT_URL = os.environ.get('QDRANT_URL')
QDRANT_API_KEY = os.environ.get('QDRANT_API_KEY')
OLLAMA_BASE_URL = os.environ.get('OLLAMA_BASE_URL')

# Initialize QnA Pipeline Handler
pipeline_handler = RetrievalAugmentationGenerationUsingHyDE(
file_path=FILE_PATH,
collection_name=COLLECTION_NAME,
vector_name=VECTOR_NAME,
prompt_template=PROMPT_TEMPLATE,
llm_model=LLM_MODEL,
embedding_model=EMBEDDING_MODEL,
qdrant_url=QDRANT_URL,
qdrant_api_key=QDRANT_API_KEY,
base_url=OLLAMA_BASE_URL
)

# Load Documents
documents = pipeline_handler.load_documents()

# Get Embeddings
embeddings = pipeline_handler.get_embeddings()

# Setup Qdrant and Add Documents
pipeline_handler.setup_qdrant_collection(embeddings=embeddings, documents=documents)

# Execute Retrieval Pipeline
question = "what are the system and operational challenges of mlops?"
output = pipeline_handler.execute_pipeline(user_query=question)
print(output)
74 changes: 74 additions & 0 deletions bootstraprag/templates/langchain/rag_with_hyde/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Advanced RAG with HyDE Project

This project implements a Advanced RAG with HyDE based Question-Answering system using LangChain, Ollama, and Qdrant.

## Prerequisites

- Python 3.8 or higher
- Ollama running locally (for LLM)
- Qdrant running locally (for vector storage)

## project structure
```tree
.
├── Dockerfile
├── __init__.py
├── api_server.py
├── client.py
├── core_advanced_rag.py
├── data
│   └── mlops.pdf
├── main.py
├── readme.md
└── requirements.txt
```

## Installation

1. `pip install bootstrap-rag`

### Setting up Ollama and Qdrant
Method 1:
1. navigate to root_folder/setups
2. run the docker-compose-dev.yml
3. run the pull_model as per the underlying OS

Method 2:
1. Install and run Ollama:
- Follow the instructions at [Ollama's official website](https://ollama.ai/) to install Ollama.
- Make sure Ollama is running and accessible at `http://localhost:11434`.

2. Install and run Qdrant:
- Follow the instructions at [Qdrant's official website](https://qdrant.tech/documentation/quick-start/) to install Qdrant.
- Make sure Qdrant is running and accessible at `http://localhost:6333`.

## How to Run
1. Create a virtual environment (optional but recommended):
```
python -m venv venv
source venv/bin/activate # On Windows, use `venv\Scripts\activate`
```
2. run `bootstraprag create <your_poc_project_name>`

3. Install the required dependencies:
```
pip install -r requirements.txt
```

### Usage

1. Prepare your MLOps PDF document and place it in the `data` directory.

2. Update the `.env` file with your specific configuration:
- Update the `file_path` to point to your PDF document.
- Update the `collection_name` if you want to use a different name for your Qdrant collection.
- Update the `qdrant_url` and `qdrant_api_key` if your Qdrant setup is different.

3. Run the script:
```
python main.py
```
or
```
python api_server.py
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
langchain==0.3.3
langchain-ollama==0.2.0
langchain-community==0.3.2
langchain-qdrant==0.1.4
unstructured[all-docs]==0.15.14
pydantic==2.9.2
qdrant-client==1.12.0
litserve==0.2.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
COLLECTION_NAME="summaries"
VECTOR_NAME="content"
LLM_MODEL="llama3.1:latest"
OLLAMA_BASE_URL="http://localhost:11434"
EMBEDDING_MODEL="snowflake-arctic-embed:33m"
QDRANT_URL="http://localhost:6333"
QDRANT_API_KEY="th3s3cr3tk3y"

LIT_SERVER_PORT=8000
LIT_SERVER_WORKERS_PER_DEVICE=2
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Use the official Python image from the Docker Hub
FROM python:3.9-slim

# Set the working directory in the container
WORKDIR /app

# Copy the requirements file to the container
COPY requirements.txt .

# Install the required dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy the current directory contents into the container at /app
COPY . .

# Set environment variables (you can replace these with values from your .env file or other configs)
ENV QDRANT_URL='http://host.docker.internal:6333' \
OLLAMA_BASE_URL='http://host.docker.internal:11434'

# Expose port 8000 for external access
EXPOSE 8000

# Command to run your application
CMD ["python", "api_server.py"]
Loading