pavanjava · pavanjava · Oct 11, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 11, 2024
diff --git a/bootstraprag/cli.py b/bootstraprag/cli.py
@@ -45,8 +45,12 @@ def create(project_name, framework, template, observability):
             'simple-rag'
         ]
     elif framework == 'standalone-qdrant':
+        framework = 'qdrant'
         template_choices = ['simple-search', 'multimodal-search', 'hybrid-search', 'hybrid-search-advanced',
                             'retrieval-quality']
+    elif framework == 'standalone-evaluations':
+        framework = 'evaluations'
+        template_choices = ['deep-evals', 'mlflow-evals', 'phoenix-evals', 'ragas-evals']
     # Use InquirerPy to select template with arrow keys
     template = inquirer.select(
         message="Which template would you like to use?",

diff --git a/bootstraprag/templates/evaluations/__init__.py b/bootstraprag/templates/evaluations/__init__.py
diff --git a/bootstraprag/templates/evaluations/deep_evals/__init__.py b/bootstraprag/templates/evaluations/deep_evals/__init__.py
diff --git a/bootstraprag/templates/evaluations/deep_evals/readme.md b/bootstraprag/templates/evaluations/deep_evals/readme.md
@@ -0,0 +1,2 @@
+## DeepEval Evaluations
+- Under development
diff --git a/bootstraprag/templates/evaluations/mlflow_evals/__init__.py b/bootstraprag/templates/evaluations/mlflow_evals/__init__.py
diff --git a/bootstraprag/templates/evaluations/mlflow_evals/readme.md b/bootstraprag/templates/evaluations/mlflow_evals/readme.md
@@ -0,0 +1,2 @@
+## RAGAS Evaluations
+- Under development
diff --git a/bootstraprag/templates/evaluations/phoenix_evals/.env b/bootstraprag/templates/evaluations/phoenix_evals/.env
@@ -0,0 +1 @@
+OPENAI_API_KEY=sk-proj-
diff --git a/bootstraprag/templates/evaluations/phoenix_evals/__init__.py b/bootstraprag/templates/evaluations/phoenix_evals/__init__.py
diff --git a/bootstraprag/templates/evaluations/phoenix_evals/basic_evaluations.py b/bootstraprag/templates/evaluations/phoenix_evals/basic_evaluations.py
@@ -0,0 +1,35 @@
+from dotenv import load_dotenv, find_dotenv
+from phoenix.evals import HallucinationEvaluator, QAEvaluator, run_evals, OpenAIModel
+import pandas as pd
+
+load_dotenv(find_dotenv())
+
+df = pd.read_csv('input_data.csv')
+
+# Set your OpenAI API key
+eval_model = OpenAIModel(model="gpt-4o")
+
+# Define your evaluators
+hallucination_evaluator = HallucinationEvaluator(eval_model)
+qa_evaluator = QAEvaluator(eval_model)
+
+# We have to make some minor changes to our dataframe to use the column names expected by our evaluators
+# for `hallucination_evaluator` the input df needs to have columns 'output', 'input', 'context'
+# for `qa_evaluator` the input df needs to have columns 'output', 'input', 'reference'
+df["context"] = df["reference"]
+df.rename(columns={"query": "input", "response": "output"}, inplace=True)
+assert all(column in df.columns for column in ["output", "input", "context", "reference"])
+
+# Run the evaluators, each evaluator will return a dataframe with evaluation results
+# We upload the evaluation results to Phoenix in the next step
+hallucination_eval_df, qa_eval_df = run_evals(
+    dataframe=df, evaluators=[hallucination_evaluator, qa_evaluator], provide_explanation=True
+)
+
+results_df = df.copy()
+results_df["hallucination_eval"] = hallucination_eval_df["label"]
+results_df["hallucination_explanation"] = hallucination_eval_df["explanation"]
+results_df["qa_eval"] = qa_eval_df["label"]
+results_df["qa_explanation"] = qa_eval_df["explanation"]
+
+results_df.to_csv('evaluation_report.csv')
diff --git a/bootstraprag/templates/evaluations/phoenix_evals/input_data.csv b/bootstraprag/templates/evaluations/phoenix_evals/input_data.csv
@@ -0,0 +1,11 @@
+,reference,query,response
+0,"The Eiffel Tower is located in Paris, France. It was constructed in 1889 as the entrance arch to the 1889 World's Fair.",Where is the Eiffel Tower located?,"The Eiffel Tower is located in Paris, France."
+1,"The Great Wall of China is over 13,000 miles long. It was built over many centuries by various Chinese dynasties to protect against nomadic invasions.",How long is the Great Wall of China?,"The Great Wall of China is approximately 13,171 miles (21,196 kilometers) long."
+2,"The Amazon rainforest is the largest tropical rainforest in the world. It covers much of northwestern Brazil and extends into Colombia, Peru and other South American countries.",What is the largest tropical rainforest?,The Amazon rainforest is the largest tropical rainforest in the world. It is home to the largest number of plant and animal species in the world.
+3,"Mount Everest is the highest mountain on Earth. It is located in the Mahalangur Himal sub-range of the Himalayas, straddling the border between Nepal and Tibet.",Which is the highest mountain on Earth?,"Mount Everest, standing at 29,029 feet (8,848 meters), is the highest mountain on Earth."
+4,"The Nile is the longest river in the world. It flows northward through northeastern Africa for approximately 6,650 km (4,132 miles) from its most distant source in Burundi to the Mediterranean Sea.",What is the longest river in the world?,"The Nile River, at 6,650 kilometers (4,132 miles), is the longest river in the world."
+5,"The Mona Lisa was painted by Leonardo da Vinci. It is considered an archetypal masterpiece of the Italian Renaissance and has been described as 'the best known, the most visited, the most written about, the most sung about, the most parodied work of art in the world'.",Who painted the Mona Lisa?,The Mona Lisa was painted by the Italian Renaissance artist Leonardo da Vinci.
+6,"The human body has 206 bones. These bones provide structure, protect organs, anchor muscles, and store calcium.",How many bones are in the human body?,The adult human body typically has 256 bones.
+7,Jupiter is the largest planet in our solar system. It is a gas giant with a mass more than two and a half times that of all the other planets in the solar system combined.,Which planet is the largest in our solar system?,Jupiter is the largest planet in our solar system.
+8,William Shakespeare wrote 'Romeo and Juliet'. It is a tragedy about two young star-crossed lovers whose deaths ultimately reconcile their feuding families.,Who wrote 'Romeo and Juliet'?,The play 'Romeo and Juliet' was written by William Shakespeare.
+9,"The first moon landing occurred in 1969. On July 20, 1969, American astronauts Neil Armstrong and Edwin 'Buzz' Aldrin became the first humans to land on the moon as part of the Apollo 11 mission.",When did the first moon landing occur?,"The first moon landing took place on July 20, 1969."
diff --git a/bootstraprag/templates/evaluations/phoenix_evals/readme.md b/bootstraprag/templates/evaluations/phoenix_evals/readme.md
@@ -0,0 +1,2 @@
+## Phoenix Evaluations
+- Under development
diff --git a/bootstraprag/templates/evaluations/phoenix_evals/requirements.txt b/bootstraprag/templates/evaluations/phoenix_evals/requirements.txt
@@ -0,0 +1,3 @@
+arize-phoenix==5.2.2
+python-dotenv==1.0.1
+pandas==2.2.3
diff --git a/bootstraprag/templates/evaluations/ragas_evals/__init__.py b/bootstraprag/templates/evaluations/ragas_evals/__init__.py
diff --git a/bootstraprag/templates/evaluations/ragas_evals/readme.md b/bootstraprag/templates/evaluations/ragas_evals/readme.md
@@ -0,0 +1,2 @@
+## MLflow Evaluations
+- Under development
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/.env b/bootstraprag/templates/langchain/simple_rag_with_observability/.env
@@ -0,0 +1,10 @@
+OLLAMA_BASE_URL="http://localhost:11434"
+OLLAMA_LLM_MODEL="llama3.1"
+EMBEDDING_MODEL="snowflake/snowflake-arctic-embed-s"
+
+QDRANT_DB_URL="http://localhost:6333/"
+QDRANT_DB_KEY="th3s3cr3tk3y"
+COLLECTION_NAME="test_langchain_collection"
+
+LIT_SERVER_PORT=8000
+LIT_SERVER_WORKERS_PER_DEVICE=2
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/Dockerfile b/bootstraprag/templates/langchain/simple_rag_with_observability/Dockerfile
@@ -0,0 +1,24 @@
+# Use the official Python image from the Docker Hub
+FROM python:3.9-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the requirements file to the container
+COPY requirements.txt .
+
+# Install the required dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the current directory contents into the container at /app
+COPY . .
+
+# Set environment variables (you can replace these with values from your .env file or other configs)
+ENV QDRANT_DB_URL='http://host.docker.internal:6333' \
+    OLLAMA_BASE_URL='http://host.docker.internal:11434'
+
+# Expose port 8000 for external access
+EXPOSE 8000
+
+# Command to run your application
+CMD ["python", "api_server.py"]
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/__init__.py b/bootstraprag/templates/langchain/simple_rag_with_observability/__init__.py
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/api_server.py b/bootstraprag/templates/langchain/simple_rag_with_observability/api_server.py
@@ -0,0 +1,36 @@
+from abc import ABC
+from dotenv import load_dotenv, find_dotenv
+from simple_rag import SimpleRAG
+import litserve as ls
+import os
+
+_ = load_dotenv(find_dotenv())
+
+
+class SimpleRAGServingAPI(ls.LitAPI, ABC):
+    def __init__(self):
+        self.simpleRAG: SimpleRAG = None
+        self.file_path: str = "data/mlops.pdf"
+        self.collection_name: str = os.environ.get("COLLECTION_NAME", 'test_collection')
+        self.qdrant_url: str = os.environ.get("QDRANT_DB_URL", 'http://localhost:6333')
+        self.qdrant_api_key: str = os.environ.get("QDRANT_DB_KEY", 'your_api_key_here')
+
+    def setup(self, devices):
+        self.simpleRAG = SimpleRAG(file_path=self.file_path, collection_name=self.collection_name,
+                                   qdrant_url=self.qdrant_url, qdrant_api_key=self.qdrant_api_key)
+
+    def decode_request(self, request, **kwargs):
+        return request["query"]
+
+    def predict(self, query: str):
+        return self.simpleRAG.query(user_query=query)
+
+    def encode_response(self, output, **kwargs):
+        return {'response': output}
+
+
+if __name__ == '__main__':
+    api = SimpleRAGServingAPI()
+    server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
+                          workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
+    server.run(port=os.environ.get('LIT_SERVER_PORT'))
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/client.py b/bootstraprag/templates/langchain/simple_rag_with_observability/client.py
@@ -0,0 +1,17 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import requests
+
+response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
+print(f"Status: {response.status_code}\nResponse:\n {response.text}")
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/custom_templates.py b/bootstraprag/templates/langchain/simple_rag_with_observability/custom_templates.py
@@ -0,0 +1,10 @@
+chat_prompt_template = """
+        You are an assistant for question-answering tasks. 
+        Use the following pieces of retrieved context to answer the question. 
+        If you don't know the answer, just say that you don't know. 
+
+        Question: {input}
+        Context: {context}
+
+        Answer:
+        """
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/data/mlops.pdf b/bootstraprag/templates/langchain/simple_rag_with_observability/data/mlops.pdf
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/main.py b/bootstraprag/templates/langchain/simple_rag_with_observability/main.py
@@ -0,0 +1,29 @@
+import os
+
+from simple_rag import SimpleRAG
+from dotenv import load_dotenv, find_dotenv
+
+load_dotenv(find_dotenv())
+
+simpleRag = SimpleRAG(
+    file_path='data/mlops.pdf',
+    collection_name=os.environ.get("COLLECTION_NAME"),
+    qdrant_url=os.environ.get("QDRANT_DB_URL"),
+    qdrant_api_key=os.environ.get("QDRANT_DB_KEY")
+)
+
+'''Uncomment the following line to insert data (only needed once) explicitly,
+else the data is inserted on the initialization'''
+# simpleRag.insert_data_with_metadata()
+
+# Start a loop to continually get input from the user
+while True:
+    # Get a query from the user
+    user_query = input("Enter your query [type 'bye' to 'exit']: ")
+
+    # Check if the user wants to terminate the loop
+    if user_query.lower() == "bye" or user_query.lower() == "exit":
+        break
+
+    response = simpleRag.query(user_query=user_query)
+    print(f"Answer: {response}")
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/readme.md b/bootstraprag/templates/langchain/simple_rag_with_observability/readme.md
@@ -0,0 +1,75 @@
+# BasicRAG Project
+
+This project implements a basic RAG based Question-Answering system using LangChain, Ollama, and Qdrant.
+
+## Prerequisites
+
+- Python 3.8 or higher
+- Ollama running locally (for LLM)
+- Qdrant running locally (for vector storage)
+
+## project structure
+```tree
+.
+├── Dockerfile
+├── __init__.py
+├── api_server.py
+├── client.py
+├── custom_templates.py
+├── data
+│   └── mlops.pdf
+├── main.py
+├── readme.md
+├── requirements.txt
+└── simple_rag.py
+```
+
+## Installation
+
+1. `pip install bootstrap-rag`
+
+### Setting up Ollama and Qdrant
+Method 1:
+1. navigate to root_folder/setups
+2. run the docker-compose-dev.yml
+3. run the pull_model as per the underlying OS
+
+Method 2:
+1. Install and run Ollama:
+   - Follow the instructions at [Ollama's official website](https://ollama.ai/) to install Ollama.
+   - Make sure Ollama is running and accessible at `http://localhost:11434`.
+
+2. Install and run Qdrant:
+   - Follow the instructions at [Qdrant's official website](https://qdrant.tech/documentation/quick-start/) to install Qdrant.
+   - Make sure Qdrant is running and accessible at `http://localhost:6333`.
+
+## How to Run
+1. Create a virtual environment (optional but recommended):
+   ```
+   python -m venv venv
+   source venv/bin/activate  # On Windows, use `venv\Scripts\activate`
+   ```
+2. run `bootstraprag create <your_poc_project_name>`
+
+3. Install the required dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+
+### Usage
+
+1. Prepare your MLOps PDF document and place it in the `data` directory.
+
+2. Update the `.env` file with your specific configuration:
+    - Update the `file_path` to point to your PDF document.
+    - Update the `collection_name` if you want to use a different name for your Qdrant collection.
+    - Update the `qdrant_url` and `qdrant_api_key` if your Qdrant setup is different.
+
+3. Run the script:
+   ```
+   python main.py
+   ```
+   or
+   ```
+   python api_server.py
+   ```
diff --git a/bootstraprag/templates/langchain/simple_rag_with_observability/requirements.txt b/bootstraprag/templates/langchain/simple_rag_with_observability/requirements.txt
@@ -0,0 +1,14 @@
+langchain==0.3.3
+langchain-core==0.3.10
+langchain-qdrant==0.1.4
+langchain-ollama==0.2.0
+langchain-community==0.3.2
+qdrant-client==1.12.0
+fastembed==0.3.6
+PyMuPDF==1.24.11
+python-dotenv==1.0.1
+litserve==0.2.2
+pydantic==2.9.0
+arize-phoenix==5.2.2
+arize-phoenix-otel==0.5.1
+openinference-instrumentation-langchain==0.1.28