Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion bootstraprag/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def create_zip(project_name):
@click.option('--observability', type=click.Choice([]), prompt=False)
def create(project_name, framework, template, observability):
template_choices = []
framework_choices = ['llamaindex', 'langchain', 'standalone-qdrant', 'standalone-evaluations']
framework_choices = ['llamaindex', 'langchain', 'standalone-qdrant', 'standalone-evaluations', 'phidata', 'crewai',
'mem0']
framework = inquirer.select(
message="Which technology would you like to use?",
choices=framework_choices
Expand Down Expand Up @@ -72,6 +73,19 @@ def create(project_name, framework, template, observability):
'phoenix-evals',
'ragas-evals'
]
elif framework == 'phidata':
template_choices = [
'agentic-rag'
]
elif framework == 'mem0':
template_choices = [
'personal-ai-assistant-with-memory',
'react-agent-with-memory'
]
elif framework == 'crewai':
template_choices = [
'rag-with-crewai-and-llamaindex'
]
# Use InquirerPy to select template with arrow keys
template = inquirer.select(
message="Which template would you like to use?",
Expand Down
45 changes: 0 additions & 45 deletions bootstraprag/templates/crewai-agents/main.py

This file was deleted.

2 changes: 0 additions & 2 deletions bootstraprag/templates/crewai-agents/requirements.txt

This file was deleted.

25 changes: 25 additions & 0 deletions bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
DB_URL='http://localhost:6333'
DB_API_KEY='th3s3cr3tk3y'
COLLECTION_NAME='CREWAI_COLLECTION'

OPENAI_API_KEY=''
OPENAI_EMBED_MODEL='text-embedding-3-small'
OPENAI_MODEL='gpt-4o'

# use this incase you are prefering to experiment with local models.
OLLAMA_BASE_URL='http://localhost:11434'
OLLAMA_LLM_MODEL='qwen2.5:latest'
OLLAMA_EMBED_MODEL='nomic-embed-text:latest'

# logger can be controlled usiing env
CRITICAL = 50
FATAL = 50
ERROR = 40
WARNING = 30
WARN = 30
INFO = 20
DEBUG = 10
NOTSET = 0

LIT_SERVER_WORKERS_PER_DEVICE=2
LIT_SERVER_PORT=8000
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from abc import ABC
from dotenv import load_dotenv, find_dotenv
from crew_agents import rag_crew
import litserve as ls
import os

_ = load_dotenv(find_dotenv())


class ReactRAGServingAPI(ls.LitAPI, ABC):
def __init__(self):
self.inputs = {'topic': ''}

def setup(self, devices):
pass

def decode_request(self, request, **kwargs):
return request["query"]

def predict(self, query: str, **kwargs):
try:
self.inputs['topic'] = query
return rag_crew.kickoff(inputs=self.inputs)
except Exception as e:
return e.args[0]

def encode_response(self, output, **kwargs):
return {'response': output}


if __name__ == '__main__':
api = ReactRAGServingAPI()
server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
server.run(port=os.environ.get('LIT_SERVER_PORT'))
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests

response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0})
print(f"Status: {response.status_code}\nResponse:\n {response.text}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from crewai import Agent, Task, Crew, Process, LLM
from crewai_tools import LlamaIndexTool
from llama_index.core.tools import FunctionTool
from llama_index_query_engine import RagQueryEngine
from typing import Any, Dict
import nltk

nltk.download('punkt')

rag_query_engine: RagQueryEngine = RagQueryEngine(input_dir='data', show_progress=True)


def get_valid_property(data) -> str:
# Return the first non-empty, non-"None" value among the keys
return (
data["description"] if data["description"] not in [None, "None"] else None
or data["text"] if data["text"] not in [
None, "None"] else None or
data["content"] if data["content"] not in [None, "None"] else None
)


def use_query_engine(query: Dict):
"""Use this function to get answers for mlops

Args:
query (Dict): the user query to search.
"""
query_engine = rag_query_engine.get_query_engine()
user_query = get_valid_property(
data=query
)
return query_engine.query(str_or_query_bundle=user_query)


query_engine_tool = FunctionTool.from_defaults(
use_query_engine,
name="mlops tool",
description="Use this tool to lookup questions regarding mlops"
)
tool = LlamaIndexTool.from_tool(query_engine_tool)

llm = LLM(
model="ollama/llama3.2:latest",
base_url="http://localhost:11434"
)

# Initialize Tool from a LlamaIndex Query Engine
# query_engine = rag_query_engine.get_query_engine()
# query_tool = LlamaIndexTool.from_query_engine(
# query_engine,
# name="mlops tool",
# description="Use this tool to lookup questions regarding mlops"
# )

# Create and assign the tools to an agent
rag_agent = Agent(
llm=llm,
role='Senior MLops export',
goal='Provide up-to-date answer on the user query regarding {topic}',
backstory="""As an mlops expert use the tool for fetching the proper context for answering
the user query regarding {topic}""",
tools=[tool],
max_iter=10,
memory=True
)

rag_task = Task(
description="{topic}",
expected_output="A summarizing answer for question {topic}.",
agent=rag_agent
)

rag_crew = Crew(
agents=[rag_agent],
tasks=[rag_task],
process=Process.sequential,
verbose=True
)

# Example of using kickoff_async
# inputs = {'topic': 'what are the challenges of mlops?'}
# output = rag_crew.kickoff(inputs=inputs)
# print(output)
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from llama_index.core import (
SimpleDirectoryReader,
VectorStoreIndex,
StorageContext,
Settings
)
from llama_index.core.base.base_query_engine import BaseQueryEngine
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.agent import ReActAgent
from llama_index.llms.ollama import Ollama
from llama_index.core.base.response.schema import Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse
from dotenv import load_dotenv, find_dotenv
from typing import Union
import qdrant_client
import logging
import os

_ = load_dotenv(find_dotenv())

logging.basicConfig(level=int(os.environ['INFO']))
logger = logging.getLogger(__name__)


class RagQueryEngine:
RESPONSE_TYPE = Union[
Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse
]

def __init__(self, input_dir: str, similarity_top_k: int = 3, chunk_size: int = 128, chunk_overlap: int = 100,
show_progress: bool = False):
self.index_loaded = False
self.similarity_top_k = similarity_top_k
self.input_dir = input_dir
self._index: VectorStoreIndex = None
self._engine = None
self.agent: ReActAgent = None
self.query_engine_tools = []
self.show_progress = show_progress

# use your prefered vector embeddings model
logger.info("initializing the OllamaEmbedding")
embed_model = OllamaEmbedding(model_name=os.environ['OLLAMA_EMBED_MODEL'],
base_url=os.environ['OLLAMA_BASE_URL'])
# openai embeddings, embedding_model_name="text-embedding-3-large"
# embed_model = OpenAIEmbedding(embed_batch_size=10, model=embedding_model_name)

# use your prefered llm
llm = Ollama(model=os.environ['OLLAMA_LLM_MODEL'], base_url=os.environ['OLLAMA_BASE_URL'], request_timeout=600)
# llm = OpenAI(model="gpt-4o")

logger.info("initializing the global settings")
Settings.embed_model = embed_model
Settings.llm = llm
Settings.chunk_size = chunk_size
Settings.chunk_overlap = chunk_overlap

# Create a local Qdrant vector store
logger.info("initializing the vector store related objects")
self.client: qdrant_client.QdrantClient = qdrant_client.QdrantClient(url=os.environ['DB_URL'],
api_key=os.environ['DB_API_KEY'])
self.vector_store = QdrantVectorStore(client=self.client, collection_name=os.environ['COLLECTION_NAME'])

self._create_index()

def _create_index(self):
if self.client.collection_exists(collection_name=os.environ['COLLECTION_NAME']):
try:
self._index = VectorStoreIndex.from_vector_store(vector_store=self.vector_store)
self.index_loaded = True
except Exception as e:
self.index_loaded = False

if not self.index_loaded:
# load data
_docs = SimpleDirectoryReader(input_dir=self.input_dir).load_data(show_progress=self.show_progress)

# build and persist index
storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
logger.info("indexing the docs in VectorStoreIndex")
self._index = VectorStoreIndex.from_documents(documents=_docs, storage_context=storage_context,
show_progress=self.show_progress)

def get_query_engine(self) -> BaseQueryEngine:

logger.info("creating query engine")
query_engine = self._index.as_query_engine(similarity_top_k=self.similarity_top_k, llm=Settings.llm)
return query_engine
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
crewai==0.86.0
crewai-tools==0.17.0
llama-index==0.12.5
llama-index-vector-stores-qdrant==0.4.0
llama-index-llms-ollama==0.5.0
llama-index-embeddings-ollama==0.5.0
qdrant-client==1.12.1
litserve==0.2.5
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ The Sub-Question Query Engine in LlamaIndex is designed to handle complex querie
}
```

#### How to spin observability
#### How to spin observability (langfuse)
- run `docker compose -f docker-compose-langfuse.yml up`
- launch langfuse in browser `http://localhost:3000`
- click on `signup`
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
OPENAI_API_KEY=sk-proj-
LIT_SERVER_WORKERS_PER_DEVICE=2
LIT_SERVER_PORT=8000
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from abc import ABC
from dotenv import load_dotenv, find_dotenv
from personal_assistant import PersonalAIAssistant
import litserve as ls
import os

_ = load_dotenv(find_dotenv())


class ReactRAGServingAPI(ls.LitAPI, ABC):
def __init__(self):
self.ai_tutor: PersonalAIAssistant = None
self.user_id = "pavan_mantha"

def setup(self, devices):
self.ai_tutor: PersonalAIAssistant = PersonalAIAssistant()

def decode_request(self, request, **kwargs):
return request["query"]

def predict(self, query: str, **kwargs):
try:
response = self.ai_tutor.ask(question=query, user_id=self.user_id)
# you can get the memory value also.
# print(self.ai_tutor.get_memories(user_id=self.user_id))
return response
except Exception as e:
return e.args[0]

def encode_response(self, output, **kwargs):
return {'response': output}


if __name__ == '__main__':
api = ReactRAGServingAPI()
server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion',
workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE')))
server.run(port=os.environ.get('LIT_SERVER_PORT'))
Loading