pavanjava · pavanjava · Oct 27, 2024 · Oct 27, 2024 · Oct 27, 2024
diff --git a/bootstraprag/cli.py b/bootstraprag/cli.py
@@ -43,7 +43,9 @@ def create(project_name, framework, template, observability):
 
     elif framework == 'langchain':
         template_choices = [
-            'simple-rag'
+            'simple-rag',
+            'rag-with-hyde',
+            'llm-as-judge'
         ]
     elif framework == 'standalone-qdrant':
         framework = 'qdrant'

diff --git a/bootstraprag/templates/langchain/llm_as_judge/.env b/bootstraprag/templates/langchain/llm_as_judge/.env
@@ -0,0 +1,10 @@
+OLLAMA_BASE_URL="http://localhost:11434"
+OLLAMA_LLM_MODEL="llama3.2:latest"
+EMBEDDING_MODEL="snowflake/snowflake-arctic-embed-s"
+
+QDRANT_DB_URL="http://localhost:6333/"
+QDRANT_DB_KEY="th3s3cr3tk3y"
+COLLECTION_NAME="crag_langchain_collection"
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=2
diff --git a/bootstraprag/templates/langchain/llm_as_judge/Dockerfile b/bootstraprag/templates/langchain/llm_as_judge/Dockerfile
@@ -0,0 +1,24 @@
+# Use the official Python image from the Docker Hub
+FROM python:3.9-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the requirements file to the container
+COPY requirements.txt .
+
+# Install the required dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the current directory contents into the container at /app
+COPY . .
+
+# Set environment variables (you can replace these with values from your .env file or other configs)
+ENV QDRANT_URL='http://host.docker.internal:6333' \
+    OLLAMA_BASE_URL='http://host.docker.internal:11434'
+
+# Expose port 8000 for external access
+EXPOSE 8000
+
+# Command to run your application
+CMD ["python", "api_server.py"]
diff --git a/bootstraprag/templates/langchain/llm_as_judge/__init__.py b/bootstraprag/templates/langchain/llm_as_judge/__init__.py
diff --git a/bootstraprag/templates/langchain/llm_as_judge/api_server.py b/bootstraprag/templates/langchain/llm_as_judge/api_server.py
@@ -0,0 +1,51 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from llm_as_judge import LLMasJudge
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class LLMasJudgeAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.llm_as_judge: LLMasJudge = None
+        self.FILE_PATH = 'data/mlops.pdf'
+        self.COLLECTION_NAME = os.environ.get('COLLECTION_NAME')
+        self.QDRANT_URL = os.environ.get('QDRANT_DB_URL')
+        self.QDRANT_API_KEY = os.environ.get('QDRANT_DB_KEY')
+        self.operation_name: str = ''
+
+    def setup(self, devices):
+        self.llm_as_judge = LLMasJudge(
+            file_path=self.FILE_PATH,
+            collection_name=self.COLLECTION_NAME,
+            qdrant_url=self.QDRANT_URL,
+            qdrant_api_key=self.QDRANT_API_KEY
+        )
+
+    def decode_request(self, request, **kwargs):
+        self.operation_name = request["operation"]
+        return request["query"]
+
+    def predict(self, query: str):
+        if self.operation_name == 'retrieval_grader':
+            return self.llm_as_judge.retrieval_grader(question=query)
+        elif self.operation_name == 'generate':
+            return self.llm_as_judge.generate(question=query)
+        elif self.operation_name == 'hallucination_grader':
+            generation = self.llm_as_judge.generate(question=query)
+            return self.llm_as_judge.hallucination_grader(question=query, generation=generation)
+        elif self.operation_name == 'answer_grader':
+            generation = self.llm_as_judge.generate(question=query)
+            return self.llm_as_judge.answer_grader(question=query, generation=generation)
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = LLMasJudgeAPI()
+    server = ls.LitServer(lit_api=api, api_path='/v1/chat/completions',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/langchain/llm_as_judge/client.py b/bootstraprag/templates/langchain/llm_as_judge/client.py
@@ -0,0 +1,17 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import requests
+
+response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
+print(f"Status: {response.status_code}\nResponse:\n {response.text}")
diff --git a/bootstraprag/templates/langchain/llm_as_judge/custom_templates.py b/bootstraprag/templates/langchain/llm_as_judge/custom_templates.py
@@ -0,0 +1,44 @@
+retrieval_grader_template = """You are a grader assessing relevance 
+    of a retrieved document to a user question. If the document contains any information or keywords related to the user 
+    question,grade it as relevant. This is a very lenient test - the document does not need to fully answer the question 
+    to be considered relevant. Give a binary score 'yes' or 'no' to indicate whether the document is relevant to the question.
+    Also provide a brief explanation for your decision.
+
+    Return your response as a JSON with two keys: 'score' (either 'yes' or 'no') and 'explanation'.
+
+    Here is the retrieved document: 
+    {document}
+
+    Here is the user question: 
+    {question}
+    """
+
+hallucination_grading_template = """You are a grader assessing whether 
+    an answer is grounded in / supported by a set of facts. Give a binary score 'yes' or 'no' score to indicate 
+    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a 
+    single key 'score' and no preamble or explanation.
+
+    Here are the facts:
+    {documents} 
+
+    Here is the answer: 
+    {generation}
+    """
+
+answer_generating_template = """You are an assistant for question-answering tasks. 
+    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
+    Use three sentences maximum and keep the answer concise:
+    Question: {question} 
+    Context: {context} 
+    Answer: 
+    """
+
+answer_grading_template = """You are a grader assessing whether an 
+    answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is 
+    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
+
+    Here is the answer:
+    {generation} 
+
+    Here is the question: {question}
+    """
diff --git a/bootstraprag/templates/langchain/llm_as_judge/data/mlops.pdf b/bootstraprag/templates/langchain/llm_as_judge/data/mlops.pdf
diff --git a/bootstraprag/templates/langchain/llm_as_judge/llm_as_judge.py b/bootstraprag/templates/langchain/llm_as_judge/llm_as_judge.py
@@ -0,0 +1,118 @@
+import os
+
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
+from langchain_core.prompts import PromptTemplate
+from langchain_core.runnables.utils import Output
+from langchain_ollama import OllamaLLM, ChatOllama
+from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
+from langchain_qdrant import QdrantVectorStore, RetrievalMode
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from qdrant_client import QdrantClient
+from dotenv import load_dotenv, find_dotenv
+from qdrant_client.http.models import VectorParams, Distance
+from typing import List, Any
+from custom_templates import (
+    retrieval_grader_template,
+    hallucination_grading_template,
+    answer_generating_template,
+    answer_grading_template
+)
+
+
+class LLMasJudge:
+    def __init__(self, file_path: str, collection_name: str, qdrant_url: str, qdrant_api_key: str):
+        load_dotenv(find_dotenv())
+        self.file_path = file_path
+        self.collection_name = collection_name
+        self.qdrant_url = qdrant_url
+        self.qdrant_api_key = qdrant_api_key
+
+        self.model = OllamaLLM(model=os.environ.get("OLLAMA_LLM_MODEL"), base_url=os.environ.get("OLLAMA_BASE_URL"))
+        self.embedding = FastEmbedEmbeddings(model=os.environ.get("EMBEDDING_MODEL"))
+        self.client = QdrantClient(url=self.qdrant_url, api_key=self.qdrant_api_key)
+        # LLM
+        self.llm = ChatOllama(model=os.environ.get('OLLAMA_LLM_MODEL'), format="json")
+        self.vector_store: QdrantVectorStore = None
+        self.documents = self.load_and_split_documents()
+        self.setup_qdrant()
+
+    def load_and_split_documents(self) -> List[Any]:
+        loader = PyMuPDFLoader(file_path=self.file_path)
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
+        return loader.load_and_split(text_splitter=text_splitter)
+
+    def setup_qdrant(self):
+        if not self.client.collection_exists(collection_name=self.collection_name):
+            try:
+                self.client.create_collection(
+                    collection_name=self.collection_name,
+                    vectors_config={
+                        "content": VectorParams(size=384, distance=Distance.COSINE)
+                    }
+                )
+                self.load_data_to_qdrant()
+            except Exception as e:
+                print(f"Exception: {str(e)}")
+        else:
+            self.vector_store = QdrantVectorStore.from_existing_collection(
+                url=self.qdrant_url,
+                api_key=self.qdrant_api_key,
+                collection_name=self.collection_name,
+                embedding=self.embedding,
+                retrieval_mode=RetrievalMode.DENSE,
+                vector_name="content"
+            )
+
+    def load_data_to_qdrant(self):
+        vector_store: QdrantVectorStore = QdrantVectorStore(client=self.client, collection_name=self.collection_name,
+                                                            embedding=self.embedding, vector_name="content",
+                                                            retrieval_mode=RetrievalMode.DENSE)
+        vector_store.add_documents(
+            documents=self.documents
+        )
+        self.vector_store = vector_store
+
+    def retrieval_grader(self, question: str):
+        prompt = PromptTemplate(
+            template=retrieval_grader_template,
+            input_variables=["question", "document"],
+        )
+        retrieval_grader = prompt | self.llm | JsonOutputParser()
+        docs = self.vector_store.as_retriever().invoke(question)
+        doc_txt = docs[1].page_content
+        retrieval_grading_response = retrieval_grader.invoke({"question": question, "document": doc_txt})
+        return retrieval_grading_response
+
+    def generate(self, question: str) -> Output:
+        prompt = PromptTemplate(
+            template=answer_generating_template,
+            input_variables=["question", "context"]
+        )
+
+        # Chain
+        rag_chain = prompt | self.llm | StrOutputParser()
+
+        # Run
+        docs = self.vector_store.as_retriever().invoke(question)
+        generation: Output = rag_chain.invoke({"context": docs, "question": question})
+        return generation
+
+    def hallucination_grader(self, question: str, generation):
+        prompt = PromptTemplate(
+            template=hallucination_grading_template,
+            input_variables=["generation", "documents"],
+        )
+        docs = self.vector_store.as_retriever().invoke(question)
+        hallucination_grader = prompt | self.llm | JsonOutputParser()
+        hallucination_grading_response = hallucination_grader.invoke({"documents": docs, "generation": generation})
+        return hallucination_grading_response
+
+    def answer_grader(self, question: str, generation: str):
+        prompt = PromptTemplate(
+            template=answer_grading_template,
+            input_variables=["generation", "question"]
+        )
+        answer_grader = prompt | self.llm | JsonOutputParser()
+        answer_grading_response = answer_grader.invoke({"question": question, "generation": generation})
+        return answer_grading_response
diff --git a/bootstraprag/templates/langchain/llm_as_judge/main.py b/bootstraprag/templates/langchain/llm_as_judge/main.py
@@ -0,0 +1,20 @@
+import os
+
+from llm_as_judge import LLMasJudge
+from dotenv import load_dotenv, find_dotenv
+
+load_dotenv(find_dotenv())
+
+llm_as_judge = LLMasJudge(
+    file_path='data/mlops.pdf',
+    collection_name=os.environ.get("COLLECTION_NAME"),
+    qdrant_url=os.environ.get("QDRANT_DB_URL"),
+    qdrant_api_key=os.environ.get("QDRANT_DB_KEY")
+)
+
+q = "what are challenges of mlops?"
+llm_as_judge.retrieval_grader(question=q)
+ans = llm_as_judge.generate(question=q)
+print(ans)
+llm_as_judge.hallucination_grader(question=q, generation=ans)
+llm_as_judge.answer_grader(question=q, generation=ans)
diff --git a/bootstraprag/templates/langchain/llm_as_judge/readme.md b/bootstraprag/templates/langchain/llm_as_judge/readme.md
@@ -0,0 +1,56 @@
+# LLM as Judge (scoped to CRAG)
+
+This project implements a LLM as Judge concept to measure 
+- answer_hallucination
+- generation_hallucination
+- retrieval_hallucination
+
+eventually this project will be converted as CRAG project.
+
+## Prerequisites
+
+- Python 3.8 or higher
+- Ollama running locally (for LLM)
+- Qdrant running locally (for vector storage)
+
+### Project structure
+```.
+├── Dockerfile
+├── __init__.py
+├── api_server.py
+├── custom_templates.py
+├── data
+│   └── mlops.pdf
+├── llm_as_judge.py
+├── main.py
+├── readme.md
+└── requirements.txt
+```
+
+## Installation
+
+1. `pip install bootstrap-rag`
+
+### Setting up Ollama and Qdrant
+Method 1:
+1. navigate to root_folder/setups
+2. run the docker-compose-dev.yml
+3. run the pull_model as per the underlying OS
+
+Method 2:
+1. Install and run Ollama:
+    - Follow the instructions at [Ollama's official website](https://ollama.ai/) to install Ollama.
+    - Make sure Ollama is running and accessible at `http://localhost:11434`.
+
+2. Install and run Qdrant:
+    - Follow the instructions at [Qdrant's official website](https://qdrant.tech/documentation/quick-start/) to install Qdrant.
+    - Make sure Qdrant is running and accessible at `http://localhost:6333`.
+
+## How to Run
+1. Create a virtual environment (optional but recommended): `python -m venv venv`
+2. `source venv/bin/activate`  # On Windows, use `venv\Scripts\activate`
+3. run `bootstraprag create <your_poc_project_name>`
+4. Install the required dependencies: `pip install -r requirements.txt`
+5. run `python main.py` or `python api_server.py`
+
+
diff --git a/bootstraprag/templates/langchain/llm_as_judge/requirements.txt b/bootstraprag/templates/langchain/llm_as_judge/requirements.txt
@@ -0,0 +1,15 @@
+langchain-qdrant==0.1.4
+langchain-community==0.3.3
+langchain-ollama==0.2.0
+tiktoken==0.8.0
+langchainhub==0.1.21
+langchain==0.3.4
+langgraph==0.2.39
+tavily-python==0.5.0
+sentence-transformers==3.2.1
+langchain-huggingface==0.1.0
+qdrant-client==1.12.0
+fastembed==0.3.6
+PyMuPDF==1.24.11
+python-dotenv==1.0.1
+litserve==0.2.3
diff --git a/bootstraprag/templates/langchain/llm_as_judge_with_observability/.env b/bootstraprag/templates/langchain/llm_as_judge_with_observability/.env
@@ -0,0 +1,10 @@
+OLLAMA_BASE_URL="http://localhost:11434"
+OLLAMA_LLM_MODEL="llama3.2:latest"
+EMBEDDING_MODEL="snowflake/snowflake-arctic-embed-s"
+
+QDRANT_DB_URL="http://localhost:6333/"
+QDRANT_DB_KEY="th3s3cr3tk3y"
+COLLECTION_NAME="crag_langchain_collection"
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=2