Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bootstraprag/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def create(project_name, framework, template, observability):
'rag-with-hyde',
'rag-with-flare',
'rag-with-self-correction',
'rag-with-controllable-agents',
'llama-deploy-with-simplemq',
'llama-deploy-with-rabbitmq',
'llama-deploy-with-kafka'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
DB_URL='http://localhost:6333'
DB_API_KEY='th3s3cr3tk3y'
COLLECTION_NAME='CONTROLLABLE_AGENTS_COLLECTION'

OPENAI_API_KEY='sk-'
OPENAI_EMBED_MODEL='text-embedding-3-small'
# both of these models can be same as well.
OPENAI_AGENT_MODEL='gpt-4-turbo-2024-04-09'
OPENAI_QUERY_MODEL='gpt-4o'

# use this incase you are prefering to experiment with local models.
OLLAMA_BASE_URL='http://localhost:11434'
OLLAMA_LLM_MODEL='llama3.1'
OLLAMA_EMBED_MODEL='nomic-embed-text:latest'

# logger can be controlled usiing env
CRITICAL = 50
FATAL = 50
ERROR = 40
WARNING = 30
WARN = 30
INFO = 20
DEBUG = 10
NOTSET = 0

LIT_SERVER_PORT=8000
LIT_SERVER_WORKERS_PER_DEVICE=4

IS_EVALUATION_NEEDED=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Use the official Python image from the Docker Hub
FROM python:3.9-slim

# Set the working directory in the container
WORKDIR /app

# Copy the requirements file to the container
COPY requirements.txt .

# Install the required dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy the current directory contents into the container at /app
COPY . .

# Set environment variables (you can replace these with values from your .env file or other configs)
ENV DB_URL='http://host.docker.internal:6333' \
OLLAMA_BASE_URL='http://host.docker.internal:11434'

# Expose port 8000 for external access
EXPOSE 8000

# Command to run your application
CMD ["python", "api_server.py"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests

response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
print(f"Status: {response.status_code}\nResponse:\n {response.text}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import os
from llama_index.core import (
SimpleDirectoryReader,
VectorStoreIndex,
StorageContext,
Settings,
)
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.openai import OpenAI
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.vector_stores.qdrant import QdrantVectorStore
from rag_evaluator import RAGEvaluator
from dotenv import load_dotenv, find_dotenv
import qdrant_client
import logging


class ControllableAgentsWithHumanInLoop:
def __init__(self, input_dir: str, show_progress: bool = True, required_exts: list[str] = ['.pdf', '.txt'],
similarity_top_k: int = 3, chunk_size: int = 512, chunk_overlap: int = 200, max_iterations: int = 20):
load_dotenv(find_dotenv())

logging.basicConfig(level=logging.INFO)
self.logger = logging.getLogger(__name__)

llm = OpenAI(model=os.environ.get('OPENAI_QUERY_MODEL'))

Settings.chunk_size = chunk_size
Settings.chunk_overlap = chunk_overlap
Settings.llm = llm

self.rag_evaluator = RAGEvaluator()

self.similarity_top_k = similarity_top_k

self.text_parser = SentenceSplitter(chunk_size=Settings.chunk_size, chunk_overlap=Settings.chunk_overlap)

self.client = qdrant_client.QdrantClient(url=os.environ['DB_URL'], api_key=os.environ['DB_API_KEY'])
self.vector_store = QdrantVectorStore(client=self.client, collection_name=os.environ['COLLECTION_NAME'])
self.vector_index = None

self.mlops_data = SimpleDirectoryReader(input_dir=input_dir, required_exts=required_exts).load_data(
show_progress=show_progress)

self.mlops_tool = self.get_tool("mlops_tool", "MLOps Tool", documents=self.mlops_data)

self.query_engine_tools = [self.mlops_tool]

agent_llm = OpenAI(model=os.environ.get('OPENAI_AGENT_MODEL'))
self.agent = ReActAgent.from_tools(
self.query_engine_tools, llm=agent_llm, verbose=True, max_iterations=max_iterations
)

def get_tool(self, name, full_name, documents=None):
self.logger.info("initializing the storage context")
storage_context = StorageContext.from_defaults(vector_store=self.vector_store)

if not self.client.collection_exists(collection_name=os.environ.get('COLLECTION_NAME')):
self.logger.info("indexing the nodes in VectorStoreIndex")
self.vector_index = VectorStoreIndex.from_documents(
documents=documents,
storage_context=storage_context,
transformations=Settings.transformations,
)
else:
self.vector_index = VectorStoreIndex.from_vector_store(vector_store=self.vector_store)

query_engine = self.vector_index.as_query_engine(similarity_top_k=self.similarity_top_k, llm=Settings.llm)
query_engine_tool = QueryEngineTool(
query_engine=query_engine,
metadata=ToolMetadata(
name=name,
description=(
"Provides information about mlops and its details"
f" {full_name}"
),
),
)
return query_engine_tool

def chat_repl(self, user_query: str, exit_when_done: bool = True):
task_message = user_query

task = self.agent.create_task(task_message)

response = None
step_output = None
message = None
while message != "exit":
if message is None or message == "":
step_output = self.agent.run_step(task.task_id)
else:
step_output = self.agent.run_step(task.task_id, input=message)
if exit_when_done and step_output.is_last:
print(">> Task marked as finished by the agent, executing task execution.")
break

message = input(">> Add feedback during step? (press enter/leave blank to continue, "
"and type 'exit' to stop): ")
if message == "exit":
break

if step_output is None:
print(">> You haven't run the agent. Task is discarded.")
elif not step_output.is_last:
print(">> The agent hasn't finished yet. Task is discarded.")
else:
response = self.agent.finalize_response(task.task_id)
print(f"Agent: {str(response)}")

if os.environ.get('IS_EVALUATION_NEEDED') == 'true':
self.rag_evaluator.evaluate(user_query=user_query, response_obj=response)
return response
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# driver code
from controllable_agents_with_human_in_the_loop import ControllableAgentsWithHumanInLoop


controllable_agent = ControllableAgentsWithHumanInLoop(input_dir='data', show_progress=True)

# Start a loop to continually get input from the user
while True:
# Get a query from the user
user_query = input("Enter your query [type 'bye' to 'exit']: ")

# Check if the user wants to terminate the loop
if user_query.lower() == "bye" or user_query.lower() == "exit":
break

response = controllable_agent.chat_repl(user_query=user_query)
print(response)
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from deepeval.integrations.llama_index import (
DeepEvalFaithfulnessEvaluator,
DeepEvalAnswerRelevancyEvaluator,
DeepEvalContextualRelevancyEvaluator
)
from dotenv import load_dotenv, find_dotenv
from typing import Any
import os
import logging

_ = load_dotenv(find_dotenv())
logging.basicConfig(level=int(os.environ['INFO']))
logger = logging.getLogger(__name__)


class RAGEvaluator:
def __init__(self):
self.faithfulness_evaluator = DeepEvalFaithfulnessEvaluator()
self.answer_relevancy_evaluator = DeepEvalAnswerRelevancyEvaluator()
self.context_relevancy_evaluator = DeepEvalContextualRelevancyEvaluator()

def evaluate(self, user_query: str, response_obj: Any):
logger.info(f"calling evaluation, user_query: {user_query}, response_obj: {response_obj}")
retrieval_context = [node.get_content() for node in response_obj.source_nodes]
actual_output = response_obj.response
faithfulness_evaluation_response = self.faithfulness_evaluator.evaluate(query=user_query, response=actual_output,
contexts=retrieval_context)
answer_relevancy_response = self.answer_relevancy_evaluator.evaluate(query=user_query, response=actual_output,
contexts=retrieval_context)
context_relevancy_response = self.context_relevancy_evaluator.evaluate(query=user_query, response=actual_output,
contexts=retrieval_context)
logger.info(f"faithfulness_evaluation_response: {faithfulness_evaluation_response.score}")
logger.info(f"answer_relevancy_response: {answer_relevancy_response.score}")
logger.info(f"context_relevancy_response: {context_relevancy_response.score}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
## Instructions to run the code

- Navigate to the root of the project and run the below command
- `pip install -r requirements.txt`
- open `.env` file update your qdrant password in the property `DB_API_KEY`
- In the data folder place your data preferably any ".pdf"
#### Note: ensure your qdrant and ollama (if LLM models are pointing to local) are running
- run `python main.py`

Note: This is Human in the loop agent, so keep a watch on the console to pass the human feedback to the agent.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
python-dotenv==1.0.1
llama-index==0.11.16
llama-index-llms-openai==0.2.11
llama-index-llms-ollama==0.3.3
llama-index-embeddings-openai==0.2.5
llama-index-embeddings-ollama==0.3.1
llama-index-vector-stores-qdrant==0.3.0
qdrant-client==1.11.3
pydantic==2.9.2
deepeval==1.3.4
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
DB_URL='http://localhost:6333'
DB_API_KEY='th3s3cr3tk3y'
COLLECTION_NAME='CONTROLLABLE_AGENTS_COLLECTION'

OPENAI_API_KEY='sk-'
OPENAI_EMBED_MODEL='text-embedding-3-small'
# both of these models can be same as well.
OPENAI_AGENT_MODEL='gpt-4-turbo-2024-04-09'
OPENAI_QUERY_MODEL='gpt-4o'

# use this incase you are prefering to experiment with local models.
OLLAMA_BASE_URL='http://localhost:11434'
OLLAMA_LLM_MODEL='llama3.1'
OLLAMA_EMBED_MODEL='nomic-embed-text:latest'

# logger can be controlled usiing env
CRITICAL = 50
FATAL = 50
ERROR = 40
WARNING = 30
WARN = 30
INFO = 20
DEBUG = 10
NOTSET = 0

LIT_SERVER_PORT=8000
LIT_SERVER_WORKERS_PER_DEVICE=4

IS_EVALUATION_NEEDED=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Use the official Python image from the Docker Hub
FROM python:3.9-slim

# Set the working directory in the container
WORKDIR /app

# Copy the requirements file to the container
COPY requirements.txt .

# Install the required dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy the current directory contents into the container at /app
COPY . .

# Set environment variables (you can replace these with values from your .env file or other configs)
ENV DB_URL='http://host.docker.internal:6333' \
OLLAMA_BASE_URL='http://host.docker.internal:11434'

# Expose port 8000 for external access
EXPOSE 8000

# Command to run your application
CMD ["python", "api_server.py"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests

response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
print(f"Status: {response.status_code}\nResponse:\n {response.text}")
Loading