Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bootstraprag/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def create(project_name, framework, template, observability):
'simple-rag'
]
elif framework == 'standalone-qdrant':
framework = 'qdrant'
template_choices = ['simple-search', 'multimodal-search', 'hybrid-search', 'hybrid-search-advanced',
'retrieval-quality']
# Use InquirerPy to select template with arrow keys
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
OLLAMA_BASE_URL="http://localhost:11434"
OLLAMA_LLM_MODEL="llama3.1"
EMBEDDING_MODEL="snowflake/snowflake-arctic-embed-s"

QDRANT_DB_URL="http://localhost:6333/"
QDRANT_DB_KEY="th3s3cr3tk3y"
COLLECTION_NAME="test_langchain_collection"

LIT_SERVER_PORT=8000
LIT_SERVER_WORKERS_PER_DEVICE=2
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Use the official Python image from the Docker Hub
FROM python:3.9-slim

# Set the working directory in the container
WORKDIR /app

# Copy the requirements file to the container
COPY requirements.txt .

# Install the required dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy the current directory contents into the container at /app
COPY . .

# Set environment variables (you can replace these with values from your .env file or other configs)
ENV QDRANT_DB_URL='http://host.docker.internal:6333' \
OLLAMA_BASE_URL='http://host.docker.internal:11434'

# Expose port 8000 for external access
EXPOSE 8000

# Command to run your application
CMD ["python", "api_server.py"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from abc import ABC
from dotenv import load_dotenv, find_dotenv
from simple_rag import SimpleRAG
import litserve as ls
import os

_ = load_dotenv(find_dotenv())


class SimpleRAGServingAPI(ls.LitAPI, ABC):
def __init__(self):
self.simpleRAG: SimpleRAG = None
self.file_path: str = "data/mlops.pdf"
self.collection_name: str = os.environ.get("COLLECTION_NAME", 'test_collection')
self.qdrant_url: str = os.environ.get("QDRANT_DB_URL", 'http://localhost:6333')
self.qdrant_api_key: str = os.environ.get("QDRANT_DB_KEY", 'your_api_key_here')

def setup(self, devices):
self.simpleRAG = SimpleRAG(file_path=self.file_path, collection_name=self.collection_name,
qdrant_url=self.qdrant_url, qdrant_api_key=self.qdrant_api_key)

def decode_request(self, request, **kwargs):
return request["query"]

def predict(self, query: str):
return self.simpleRAG.query(user_query=query)

def encode_response(self, output, **kwargs):
return {'response': output}


if __name__ == '__main__':
api = SimpleRAGServingAPI()
server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
server.run(port=os.environ.get('LIT_SERVER_PORT'))
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests

response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
print(f"Status: {response.status_code}\nResponse:\n {response.text}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
chat_prompt_template = """
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.

Question: {input}
Context: {context}

Answer:
"""
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os

from simple_rag import SimpleRAG
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

simpleRag = SimpleRAG(
file_path='data/mlops.pdf',
collection_name=os.environ.get("COLLECTION_NAME"),
qdrant_url=os.environ.get("QDRANT_DB_URL"),
qdrant_api_key=os.environ.get("QDRANT_DB_KEY")
)

'''Uncomment the following line to insert data (only needed once) explicitly,
else the data is inserted on the initialization'''
# simpleRag.insert_data_with_metadata()

# Start a loop to continually get input from the user
while True:
# Get a query from the user
user_query = input("Enter your query [type 'bye' to 'exit']: ")

# Check if the user wants to terminate the loop
if user_query.lower() == "bye" or user_query.lower() == "exit":
break

response = simpleRag.query(user_query=user_query)
print(f"Answer: {response}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# BasicRAG Project

This project implements a basic RAG based Question-Answering system using LangChain, Ollama, and Qdrant.

## Prerequisites

- Python 3.8 or higher
- Ollama running locally (for LLM)
- Qdrant running locally (for vector storage)

## project structure
```tree
.
├── Dockerfile
├── __init__.py
├── api_server.py
├── client.py
├── custom_templates.py
├── data
│   └── mlops.pdf
├── main.py
├── readme.md
├── requirements.txt
└── simple_rag.py
```

## Installation

1. `pip install bootstrap-rag`

### Setting up Ollama and Qdrant
Method 1:
1. navigate to root_folder/setups
2. run the docker-compose-dev.yml
3. run the pull_model as per the underlying OS

Method 2:
1. Install and run Ollama:
- Follow the instructions at [Ollama's official website](https://ollama.ai/) to install Ollama.
- Make sure Ollama is running and accessible at `http://localhost:11434`.

2. Install and run Qdrant:
- Follow the instructions at [Qdrant's official website](https://qdrant.tech/documentation/quick-start/) to install Qdrant.
- Make sure Qdrant is running and accessible at `http://localhost:6333`.

## How to Run
1. Create a virtual environment (optional but recommended):
```
python -m venv venv
source venv/bin/activate # On Windows, use `venv\Scripts\activate`
```
2. run `bootstraprag create <your_poc_project_name>`

3. Install the required dependencies:
```
pip install -r requirements.txt
```

### Usage

1. Prepare your MLOps PDF document and place it in the `data` directory.

2. Update the `.env` file with your specific configuration:
- Update the `file_path` to point to your PDF document.
- Update the `collection_name` if you want to use a different name for your Qdrant collection.
- Update the `qdrant_url` and `qdrant_api_key` if your Qdrant setup is different.

3. Run the script:
```
python main.py
```
or
```
python api_server.py
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
langchain==0.3.3
langchain-core==0.3.10
langchain-qdrant==0.1.4
langchain-ollama==0.2.0
langchain-community==0.3.2
qdrant-client==1.12.0
fastembed==0.3.6
PyMuPDF==1.24.11
python-dotenv==1.0.1
litserve==0.2.2
pydantic==2.9.0
arize-phoenix==5.2.2
arize-phoenix-otel==0.5.1
openinference-instrumentation-langchain==0.1.28
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import os

from langchain_community.document_loaders import PyMuPDFLoader
from langchain_ollama import OllamaLLM
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_qdrant import QdrantVectorStore
from langchain.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct
from typing import Any, List
from uuid import uuid4
from dotenv import load_dotenv, find_dotenv
from custom_templates import chat_prompt_template
from phoenix.otel import register
from openinference.instrumentation.langchain import LangChainInstrumentor
import phoenix as px

px.launch_app().view()
tracer_provider = register(
project_name="simple-rag",
endpoint="http://127.0.0.1:4317", # change this to remote if needed
set_global_tracer_provider=True

)
LangChainInstrumentor().instrument(tracer_provider=tracer_provider, skip_dep_check=True)


class SimpleRAG:
def __init__(self, file_path: str, collection_name: str, qdrant_url: str, qdrant_api_key: str):
load_dotenv(find_dotenv())
self.file_path = file_path
self.collection_name = collection_name
self.qdrant_url = qdrant_url
self.qdrant_api_key = qdrant_api_key

self.model = OllamaLLM(model=os.environ.get("OLLAMA_LLM_MODEL"), base_url=os.environ.get("OLLAMA_BASE_URL"))
self.embedding = FastEmbedEmbeddings(model=os.environ.get("EMBEDDING_MODEL"))
self.client = QdrantClient(url=self.qdrant_url, api_key=self.qdrant_api_key)

# self.documents = self.load_and_split_documents()
self.setup_qdrant()
self.vector_store = self.setup_vector_store()
self.retrieval_chain = self.setup_retrieval_chain()

def setup_qdrant(self):
if not self.client.collection_exists(collection_name=self.collection_name):
try:
self.client.create_collection(
collection_name=self.collection_name,
vectors_config={
"content": VectorParams(size=384, distance=Distance.COSINE)
}
)

self.insert_data_with_metadata()
except Exception as e:
print(f"Exception: {str(e)}")

def load_and_split_documents(self) -> List[Any]:
loader = PyMuPDFLoader(file_path=self.file_path)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
return loader.load_and_split(text_splitter=text_splitter)

def insert_data_with_metadata(self):
documents = self.load_and_split_documents()
chunked_data = []

for doc in documents:
id = str(uuid4())
content = doc.page_content
source = doc.metadata['source']
page = doc.metadata['page']

content_vector = self.embedding.embed_documents([content])[0]
vector_dict = {"content": content_vector}

payload = {
"page_content": content,
"metadata": {
"id": id,
"page_content": content,
"source": source,
"page": page,
}
}

metadata = PointStruct(id=id, vector=vector_dict, payload=payload)
chunked_data.append(metadata)

self.client.upsert(
collection_name=self.collection_name,
wait=True,
points=chunked_data)

def setup_vector_store(self) -> QdrantVectorStore:
return QdrantVectorStore(client=self.client, collection_name=self.collection_name, embedding=self.embedding,
vector_name="content")

def setup_retrieval_chain(self):
prompt = ChatPromptTemplate.from_template(template=chat_prompt_template)
retriever = self.vector_store.as_retriever()
combine_docs_chain = create_stuff_documents_chain(self.model, prompt)
return create_retrieval_chain(retriever, combine_docs_chain)

def query(self, user_query: str) -> str:
result = self.retrieval_chain.invoke({"input": user_query})
return result["answer"]