Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion bootstraprag/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ def create(project_name, framework, template, observability):

elif framework == 'langchain':
template_choices = [
'simple-rag'
'simple-rag',
'rag-with-hyde',
'llm-as-judge'
]
elif framework == 'standalone-qdrant':
framework = 'qdrant'
Expand Down
10 changes: 10 additions & 0 deletions bootstraprag/templates/langchain/llm_as_judge/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
OLLAMA_BASE_URL="http://localhost:11434"
OLLAMA_LLM_MODEL="llama3.2:latest"
EMBEDDING_MODEL="snowflake/snowflake-arctic-embed-s"

QDRANT_DB_URL="http://localhost:6333/"
QDRANT_DB_KEY="th3s3cr3tk3y"
COLLECTION_NAME="crag_langchain_collection"

LIT_SERVER_PORT=8000
LIT_SERVER_WORKERS_PER_DEVICE=2
24 changes: 24 additions & 0 deletions bootstraprag/templates/langchain/llm_as_judge/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Use the official Python image from the Docker Hub
FROM python:3.9-slim

# Set the working directory in the container
WORKDIR /app

# Copy the requirements file to the container
COPY requirements.txt .

# Install the required dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy the current directory contents into the container at /app
COPY . .

# Set environment variables (you can replace these with values from your .env file or other configs)
ENV QDRANT_URL='http://host.docker.internal:6333' \
OLLAMA_BASE_URL='http://host.docker.internal:11434'

# Expose port 8000 for external access
EXPOSE 8000

# Command to run your application
CMD ["python", "api_server.py"]
Empty file.
51 changes: 51 additions & 0 deletions bootstraprag/templates/langchain/llm_as_judge/api_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from abc import ABC
from dotenv import load_dotenv, find_dotenv
from llm_as_judge import LLMasJudge
import litserve as ls
import os

_ = load_dotenv(find_dotenv())


class LLMasJudgeAPI(ls.LitAPI, ABC):
def __init__(self):
self.llm_as_judge: LLMasJudge = None
self.FILE_PATH = 'data/mlops.pdf'
self.COLLECTION_NAME = os.environ.get('COLLECTION_NAME')
self.QDRANT_URL = os.environ.get('QDRANT_DB_URL')
self.QDRANT_API_KEY = os.environ.get('QDRANT_DB_KEY')
self.operation_name: str = ''

def setup(self, devices):
self.llm_as_judge = LLMasJudge(
file_path=self.FILE_PATH,
collection_name=self.COLLECTION_NAME,
qdrant_url=self.QDRANT_URL,
qdrant_api_key=self.QDRANT_API_KEY
)

def decode_request(self, request, **kwargs):
self.operation_name = request["operation"]
return request["query"]

def predict(self, query: str):
if self.operation_name == 'retrieval_grader':
return self.llm_as_judge.retrieval_grader(question=query)
elif self.operation_name == 'generate':
return self.llm_as_judge.generate(question=query)
elif self.operation_name == 'hallucination_grader':
generation = self.llm_as_judge.generate(question=query)
return self.llm_as_judge.hallucination_grader(question=query, generation=generation)
elif self.operation_name == 'answer_grader':
generation = self.llm_as_judge.generate(question=query)
return self.llm_as_judge.answer_grader(question=query, generation=generation)

def encode_response(self, output, **kwargs):
return {'response': output}


if __name__ == '__main__':
api = LLMasJudgeAPI()
server = ls.LitServer(lit_api=api, api_path='/v1/chat/completions',
workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
server.run(port=os.environ.get('LIT_SERVER_PORT'))
17 changes: 17 additions & 0 deletions bootstraprag/templates/langchain/llm_as_judge/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests

response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
print(f"Status: {response.status_code}\nResponse:\n {response.text}")
44 changes: 44 additions & 0 deletions bootstraprag/templates/langchain/llm_as_judge/custom_templates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
retrieval_grader_template = """You are a grader assessing relevance
of a retrieved document to a user question. If the document contains any information or keywords related to the user
question,grade it as relevant. This is a very lenient test - the document does not need to fully answer the question
to be considered relevant. Give a binary score 'yes' or 'no' to indicate whether the document is relevant to the question.
Also provide a brief explanation for your decision.

Return your response as a JSON with two keys: 'score' (either 'yes' or 'no') and 'explanation'.

Here is the retrieved document:
{document}

Here is the user question:
{question}
"""

hallucination_grading_template = """You are a grader assessing whether
an answer is grounded in / supported by a set of facts. Give a binary score 'yes' or 'no' score to indicate
whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a
single key 'score' and no preamble or explanation.

Here are the facts:
{documents}

Here is the answer:
{generation}
"""

answer_generating_template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise:
Question: {question}
Context: {context}
Answer:
"""

answer_grading_template = """You are a grader assessing whether an
answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is
useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.

Here is the answer:
{generation}

Here is the question: {question}
"""
Binary file not shown.
118 changes: 118 additions & 0 deletions bootstraprag/templates/langchain/llm_as_judge/llm_as_judge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import os

from langchain_community.document_loaders import PyMuPDFLoader
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables.utils import Output
from langchain_ollama import OllamaLLM, ChatOllama
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_qdrant import QdrantVectorStore, RetrievalMode
from langchain_text_splitters import RecursiveCharacterTextSplitter
from qdrant_client import QdrantClient
from dotenv import load_dotenv, find_dotenv
from qdrant_client.http.models import VectorParams, Distance
from typing import List, Any
from custom_templates import (
retrieval_grader_template,
hallucination_grading_template,
answer_generating_template,
answer_grading_template
)


class LLMasJudge:
def __init__(self, file_path: str, collection_name: str, qdrant_url: str, qdrant_api_key: str):
load_dotenv(find_dotenv())
self.file_path = file_path
self.collection_name = collection_name
self.qdrant_url = qdrant_url
self.qdrant_api_key = qdrant_api_key

self.model = OllamaLLM(model=os.environ.get("OLLAMA_LLM_MODEL"), base_url=os.environ.get("OLLAMA_BASE_URL"))
self.embedding = FastEmbedEmbeddings(model=os.environ.get("EMBEDDING_MODEL"))
self.client = QdrantClient(url=self.qdrant_url, api_key=self.qdrant_api_key)
# LLM
self.llm = ChatOllama(model=os.environ.get('OLLAMA_LLM_MODEL'), format="json")
self.vector_store: QdrantVectorStore = None
self.documents = self.load_and_split_documents()
self.setup_qdrant()

def load_and_split_documents(self) -> List[Any]:
loader = PyMuPDFLoader(file_path=self.file_path)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
return loader.load_and_split(text_splitter=text_splitter)

def setup_qdrant(self):
if not self.client.collection_exists(collection_name=self.collection_name):
try:
self.client.create_collection(
collection_name=self.collection_name,
vectors_config={
"content": VectorParams(size=384, distance=Distance.COSINE)
}
)
self.load_data_to_qdrant()
except Exception as e:
print(f"Exception: {str(e)}")
else:
self.vector_store = QdrantVectorStore.from_existing_collection(
url=self.qdrant_url,
api_key=self.qdrant_api_key,
collection_name=self.collection_name,
embedding=self.embedding,
retrieval_mode=RetrievalMode.DENSE,
vector_name="content"
)

def load_data_to_qdrant(self):
vector_store: QdrantVectorStore = QdrantVectorStore(client=self.client, collection_name=self.collection_name,
embedding=self.embedding, vector_name="content",
retrieval_mode=RetrievalMode.DENSE)
vector_store.add_documents(
documents=self.documents
)
self.vector_store = vector_store

def retrieval_grader(self, question: str):
prompt = PromptTemplate(
template=retrieval_grader_template,
input_variables=["question", "document"],
)
retrieval_grader = prompt | self.llm | JsonOutputParser()
docs = self.vector_store.as_retriever().invoke(question)
doc_txt = docs[1].page_content
retrieval_grading_response = retrieval_grader.invoke({"question": question, "document": doc_txt})
return retrieval_grading_response

def generate(self, question: str) -> Output:
prompt = PromptTemplate(
template=answer_generating_template,
input_variables=["question", "context"]
)

# Chain
rag_chain = prompt | self.llm | StrOutputParser()

# Run
docs = self.vector_store.as_retriever().invoke(question)
generation: Output = rag_chain.invoke({"context": docs, "question": question})
return generation

def hallucination_grader(self, question: str, generation):
prompt = PromptTemplate(
template=hallucination_grading_template,
input_variables=["generation", "documents"],
)
docs = self.vector_store.as_retriever().invoke(question)
hallucination_grader = prompt | self.llm | JsonOutputParser()
hallucination_grading_response = hallucination_grader.invoke({"documents": docs, "generation": generation})
return hallucination_grading_response

def answer_grader(self, question: str, generation: str):
prompt = PromptTemplate(
template=answer_grading_template,
input_variables=["generation", "question"]
)
answer_grader = prompt | self.llm | JsonOutputParser()
answer_grading_response = answer_grader.invoke({"question": question, "generation": generation})
return answer_grading_response
20 changes: 20 additions & 0 deletions bootstraprag/templates/langchain/llm_as_judge/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os

from llm_as_judge import LLMasJudge
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

llm_as_judge = LLMasJudge(
file_path='data/mlops.pdf',
collection_name=os.environ.get("COLLECTION_NAME"),
qdrant_url=os.environ.get("QDRANT_DB_URL"),
qdrant_api_key=os.environ.get("QDRANT_DB_KEY")
)

q = "what are challenges of mlops?"
llm_as_judge.retrieval_grader(question=q)
ans = llm_as_judge.generate(question=q)
print(ans)
llm_as_judge.hallucination_grader(question=q, generation=ans)
llm_as_judge.answer_grader(question=q, generation=ans)
56 changes: 56 additions & 0 deletions bootstraprag/templates/langchain/llm_as_judge/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# LLM as Judge (scoped to CRAG)

This project implements a LLM as Judge concept to measure
- answer_hallucination
- generation_hallucination
- retrieval_hallucination

eventually this project will be converted as CRAG project.

## Prerequisites

- Python 3.8 or higher
- Ollama running locally (for LLM)
- Qdrant running locally (for vector storage)

### Project structure
```.
├── Dockerfile
├── __init__.py
├── api_server.py
├── custom_templates.py
├── data
│   └── mlops.pdf
├── llm_as_judge.py
├── main.py
├── readme.md
└── requirements.txt
```

## Installation

1. `pip install bootstrap-rag`

### Setting up Ollama and Qdrant
Method 1:
1. navigate to root_folder/setups
2. run the docker-compose-dev.yml
3. run the pull_model as per the underlying OS

Method 2:
1. Install and run Ollama:
- Follow the instructions at [Ollama's official website](https://ollama.ai/) to install Ollama.
- Make sure Ollama is running and accessible at `http://localhost:11434`.

2. Install and run Qdrant:
- Follow the instructions at [Qdrant's official website](https://qdrant.tech/documentation/quick-start/) to install Qdrant.
- Make sure Qdrant is running and accessible at `http://localhost:6333`.

## How to Run
1. Create a virtual environment (optional but recommended): `python -m venv venv`
2. `source venv/bin/activate` # On Windows, use `venv\Scripts\activate`
3. run `bootstraprag create <your_poc_project_name>`
4. Install the required dependencies: `pip install -r requirements.txt`
5. run `python main.py` or `python api_server.py`


15 changes: 15 additions & 0 deletions bootstraprag/templates/langchain/llm_as_judge/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
langchain-qdrant==0.1.4
langchain-community==0.3.3
langchain-ollama==0.2.0
tiktoken==0.8.0
langchainhub==0.1.21
langchain==0.3.4
langgraph==0.2.39
tavily-python==0.5.0
sentence-transformers==3.2.1
langchain-huggingface==0.1.0
qdrant-client==1.12.0
fastembed==0.3.6
PyMuPDF==1.24.11
python-dotenv==1.0.1
litserve==0.2.3
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
OLLAMA_BASE_URL="http://localhost:11434"
OLLAMA_LLM_MODEL="llama3.2:latest"
EMBEDDING_MODEL="snowflake/snowflake-arctic-embed-s"

QDRANT_DB_URL="http://localhost:6333/"
QDRANT_DB_KEY="th3s3cr3tk3y"
COLLECTION_NAME="crag_langchain_collection"

LIT_SERVER_PORT=8000
LIT_SERVER_WORKERS_PER_DEVICE=2
Loading