diff --git a/bootstraprag/cli.py b/bootstraprag/cli.py index 065bae8..58bd07f 100644 --- a/bootstraprag/cli.py +++ b/bootstraprag/cli.py @@ -22,7 +22,8 @@ def create_zip(project_name): @click.option('--observability', type=click.Choice([]), prompt=False) def create(project_name, framework, template, observability): template_choices = [] - framework_choices = ['llamaindex', 'langchain', 'standalone-qdrant', 'standalone-evaluations'] + framework_choices = ['llamaindex', 'langchain', 'standalone-qdrant', 'standalone-evaluations', 'phidata', 'crewai', + 'mem0'] framework = inquirer.select( message="Which technology would you like to use?", choices=framework_choices @@ -72,6 +73,19 @@ def create(project_name, framework, template, observability): 'phoenix-evals', 'ragas-evals' ] + elif framework == 'phidata': + template_choices = [ + 'agentic-rag' + ] + elif framework == 'mem0': + template_choices = [ + 'personal-ai-assistant-with-memory', + 'react-agent-with-memory' + ] + elif framework == 'crewai': + template_choices = [ + 'rag-with-crewai-and-llamaindex' + ] # Use InquirerPy to select template with arrow keys template = inquirer.select( message="Which template would you like to use?", diff --git a/bootstraprag/templates/crewai-agents/main.py b/bootstraprag/templates/crewai-agents/main.py deleted file mode 100644 index 95198d7..0000000 --- a/bootstraprag/templates/crewai-agents/main.py +++ /dev/null @@ -1,45 +0,0 @@ -from crewai import Agent, Task, Crew, Process, LLM - -llm = LLM(model="ollama/llama3.2:latest", base_url="http://localhost:11434", temperature=0.8, timeout=300) - -# Define the Prompt Supervisor agent -prompt_supervisor = Agent( - role='Prompt Supervisor', - goal='Ensure all agent prompts are clear, effective, and aligned with users objectives.', - backstory=( - "As a Prompt Supervisor, you have a keen eye for detail and a deep understanding " - "of effective communication strategies. Your mission is to review and refine prompts " - "to maximize the performance of AI agents." - ), - llm=llm -) - -# Define a task for the Prompt Supervisor to review and enhance the Senior Researcher's prompt -prompt_supervisor_task = Task( - description=( - "Review the prompt provided to the {topic} Senior prompt supervisor, assessing its clarity, " - "effectiveness, and alignment with the project's objectives. Provide constructive feedback " - "and suggest improvements to enhance the agent's performance." - ), - expected_output=( - "A detailed evaluation of the original prompt, including specific suggestions for improvement " - "and a revised version of the prompt that optimizes clarity and effectiveness along with few shot of examples." - ), - agent=prompt_supervisor -) - - -def main(): - # Forming the crew and kicking off the process - crew = Crew( - agents=[prompt_supervisor], - tasks=[prompt_supervisor_task], - process=Process.sequential, - verbose=True - ) - result = crew.kickoff(inputs={'topic': 'Get Financial data for 2023'}) - print(result) - - -if __name__ == "__main__": - main() diff --git a/bootstraprag/templates/crewai-agents/requirements.txt b/bootstraprag/templates/crewai-agents/requirements.txt deleted file mode 100644 index b9b1d3d..0000000 --- a/bootstraprag/templates/crewai-agents/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -crewai -crewai-tools \ No newline at end of file diff --git a/bootstraprag/templates/crewai-agents/__init__.py b/bootstraprag/templates/crewai/__init__.py similarity index 100% rename from bootstraprag/templates/crewai-agents/__init__.py rename to bootstraprag/templates/crewai/__init__.py diff --git a/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/.env b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/.env new file mode 100644 index 0000000..1dbe14d --- /dev/null +++ b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/.env @@ -0,0 +1,25 @@ +DB_URL='http://localhost:6333' +DB_API_KEY='th3s3cr3tk3y' +COLLECTION_NAME='CREWAI_COLLECTION' + +OPENAI_API_KEY='' +OPENAI_EMBED_MODEL='text-embedding-3-small' +OPENAI_MODEL='gpt-4o' + +# use this incase you are prefering to experiment with local models. +OLLAMA_BASE_URL='http://localhost:11434' +OLLAMA_LLM_MODEL='qwen2.5:latest' +OLLAMA_EMBED_MODEL='nomic-embed-text:latest' + +# logger can be controlled usiing env +CRITICAL = 50 +FATAL = 50 +ERROR = 40 +WARNING = 30 +WARN = 30 +INFO = 20 +DEBUG = 10 +NOTSET = 0 + +LIT_SERVER_WORKERS_PER_DEVICE=2 +LIT_SERVER_PORT=8000 \ No newline at end of file diff --git a/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/__init__.py b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/api_server.py b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/api_server.py new file mode 100644 index 0000000..9bebd92 --- /dev/null +++ b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/api_server.py @@ -0,0 +1,35 @@ +from abc import ABC +from dotenv import load_dotenv, find_dotenv +from crew_agents import rag_crew +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class ReactRAGServingAPI(ls.LitAPI, ABC): + def __init__(self): + self.inputs = {'topic': ''} + + def setup(self, devices): + pass + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str, **kwargs): + try: + self.inputs['topic'] = query + return rag_crew.kickoff(inputs=self.inputs) + except Exception as e: + return e.args[0] + + def encode_response(self, output, **kwargs): + return {'response': output} + + +if __name__ == '__main__': + api = ReactRAGServingAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/client.py b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/client.py new file mode 100644 index 0000000..e396db2 --- /dev/null +++ b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/client.py @@ -0,0 +1,17 @@ +# Copyright The Lightning AI team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import requests + +response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0}) +print(f"Status: {response.status_code}\nResponse:\n {response.text}") diff --git a/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/crew_agents.py b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/crew_agents.py new file mode 100644 index 0000000..80b1254 --- /dev/null +++ b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/crew_agents.py @@ -0,0 +1,84 @@ +from crewai import Agent, Task, Crew, Process, LLM +from crewai_tools import LlamaIndexTool +from llama_index.core.tools import FunctionTool +from llama_index_query_engine import RagQueryEngine +from typing import Any, Dict +import nltk + +nltk.download('punkt') + +rag_query_engine: RagQueryEngine = RagQueryEngine(input_dir='data', show_progress=True) + + +def get_valid_property(data) -> str: + # Return the first non-empty, non-"None" value among the keys + return ( + data["description"] if data["description"] not in [None, "None"] else None + or data["text"] if data["text"] not in [ + None, "None"] else None or + data["content"] if data["content"] not in [None, "None"] else None + ) + + +def use_query_engine(query: Dict): + """Use this function to get answers for mlops + + Args: + query (Dict): the user query to search. + """ + query_engine = rag_query_engine.get_query_engine() + user_query = get_valid_property( + data=query + ) + return query_engine.query(str_or_query_bundle=user_query) + + +query_engine_tool = FunctionTool.from_defaults( + use_query_engine, + name="mlops tool", + description="Use this tool to lookup questions regarding mlops" +) +tool = LlamaIndexTool.from_tool(query_engine_tool) + +llm = LLM( + model="ollama/llama3.2:latest", + base_url="http://localhost:11434" +) + +# Initialize Tool from a LlamaIndex Query Engine +# query_engine = rag_query_engine.get_query_engine() +# query_tool = LlamaIndexTool.from_query_engine( +# query_engine, +# name="mlops tool", +# description="Use this tool to lookup questions regarding mlops" +# ) + +# Create and assign the tools to an agent +rag_agent = Agent( + llm=llm, + role='Senior MLops export', + goal='Provide up-to-date answer on the user query regarding {topic}', + backstory="""As an mlops expert use the tool for fetching the proper context for answering + the user query regarding {topic}""", + tools=[tool], + max_iter=10, + memory=True +) + +rag_task = Task( + description="{topic}", + expected_output="A summarizing answer for question {topic}.", + agent=rag_agent +) + +rag_crew = Crew( + agents=[rag_agent], + tasks=[rag_task], + process=Process.sequential, + verbose=True +) + +# Example of using kickoff_async +# inputs = {'topic': 'what are the challenges of mlops?'} +# output = rag_crew.kickoff(inputs=inputs) +# print(output) diff --git a/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/data/mlops.pdf b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/data/mlops.pdf new file mode 100644 index 0000000..c8d8170 Binary files /dev/null and b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/data/mlops.pdf differ diff --git a/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/llama_index_query_engine.py b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/llama_index_query_engine.py new file mode 100644 index 0000000..bf287b7 --- /dev/null +++ b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/llama_index_query_engine.py @@ -0,0 +1,88 @@ +from llama_index.core import ( + SimpleDirectoryReader, + VectorStoreIndex, + StorageContext, + Settings +) +from llama_index.core.base.base_query_engine import BaseQueryEngine +from llama_index.embeddings.ollama import OllamaEmbedding +from llama_index.vector_stores.qdrant import QdrantVectorStore +from llama_index.core.agent import ReActAgent +from llama_index.llms.ollama import Ollama +from llama_index.core.base.response.schema import Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse +from dotenv import load_dotenv, find_dotenv +from typing import Union +import qdrant_client +import logging +import os + +_ = load_dotenv(find_dotenv()) + +logging.basicConfig(level=int(os.environ['INFO'])) +logger = logging.getLogger(__name__) + + +class RagQueryEngine: + RESPONSE_TYPE = Union[ + Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse + ] + + def __init__(self, input_dir: str, similarity_top_k: int = 3, chunk_size: int = 128, chunk_overlap: int = 100, + show_progress: bool = False): + self.index_loaded = False + self.similarity_top_k = similarity_top_k + self.input_dir = input_dir + self._index: VectorStoreIndex = None + self._engine = None + self.agent: ReActAgent = None + self.query_engine_tools = [] + self.show_progress = show_progress + + # use your prefered vector embeddings model + logger.info("initializing the OllamaEmbedding") + embed_model = OllamaEmbedding(model_name=os.environ['OLLAMA_EMBED_MODEL'], + base_url=os.environ['OLLAMA_BASE_URL']) + # openai embeddings, embedding_model_name="text-embedding-3-large" + # embed_model = OpenAIEmbedding(embed_batch_size=10, model=embedding_model_name) + + # use your prefered llm + llm = Ollama(model=os.environ['OLLAMA_LLM_MODEL'], base_url=os.environ['OLLAMA_BASE_URL'], request_timeout=600) + # llm = OpenAI(model="gpt-4o") + + logger.info("initializing the global settings") + Settings.embed_model = embed_model + Settings.llm = llm + Settings.chunk_size = chunk_size + Settings.chunk_overlap = chunk_overlap + + # Create a local Qdrant vector store + logger.info("initializing the vector store related objects") + self.client: qdrant_client.QdrantClient = qdrant_client.QdrantClient(url=os.environ['DB_URL'], + api_key=os.environ['DB_API_KEY']) + self.vector_store = QdrantVectorStore(client=self.client, collection_name=os.environ['COLLECTION_NAME']) + + self._create_index() + + def _create_index(self): + if self.client.collection_exists(collection_name=os.environ['COLLECTION_NAME']): + try: + self._index = VectorStoreIndex.from_vector_store(vector_store=self.vector_store) + self.index_loaded = True + except Exception as e: + self.index_loaded = False + + if not self.index_loaded: + # load data + _docs = SimpleDirectoryReader(input_dir=self.input_dir).load_data(show_progress=self.show_progress) + + # build and persist index + storage_context = StorageContext.from_defaults(vector_store=self.vector_store) + logger.info("indexing the docs in VectorStoreIndex") + self._index = VectorStoreIndex.from_documents(documents=_docs, storage_context=storage_context, + show_progress=self.show_progress) + + def get_query_engine(self) -> BaseQueryEngine: + + logger.info("creating query engine") + query_engine = self._index.as_query_engine(similarity_top_k=self.similarity_top_k, llm=Settings.llm) + return query_engine diff --git a/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/requirements.txt b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/requirements.txt new file mode 100644 index 0000000..b65ab49 --- /dev/null +++ b/bootstraprag/templates/crewai/rag_with_crewai_and_llamaindex/requirements.txt @@ -0,0 +1,8 @@ +crewai==0.86.0 +crewai-tools==0.17.0 +llama-index==0.12.5 +llama-index-vector-stores-qdrant==0.4.0 +llama-index-llms-ollama==0.5.0 +llama-index-embeddings-ollama==0.5.0 +qdrant-client==1.12.1 +litserve==0.2.5 \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/readme.md b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/readme.md index 452a767..4c5106a 100644 --- a/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/readme.md +++ b/bootstraprag/templates/llamaindex/rag_with_sub_question_query_engine_with_observability/readme.md @@ -16,7 +16,7 @@ The Sub-Question Query Engine in LlamaIndex is designed to handle complex querie } ``` -#### How to spin observability +#### How to spin observability (langfuse) - run `docker compose -f docker-compose-langfuse.yml up` - launch langfuse in browser `http://localhost:3000` - click on `signup` diff --git a/bootstraprag/templates/mem0/__init__.py b/bootstraprag/templates/mem0/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/.env b/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/.env new file mode 100644 index 0000000..c0a3e3c --- /dev/null +++ b/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/.env @@ -0,0 +1,3 @@ +OPENAI_API_KEY=sk-proj- +LIT_SERVER_WORKERS_PER_DEVICE=2 +LIT_SERVER_PORT=8000 \ No newline at end of file diff --git a/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/__init__.py b/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/api_server.py b/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/api_server.py new file mode 100644 index 0000000..2139cb3 --- /dev/null +++ b/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/api_server.py @@ -0,0 +1,38 @@ +from abc import ABC +from dotenv import load_dotenv, find_dotenv +from personal_assistant import PersonalAIAssistant +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class ReactRAGServingAPI(ls.LitAPI, ABC): + def __init__(self): + self.ai_tutor: PersonalAIAssistant = None + self.user_id = "pavan_mantha" + + def setup(self, devices): + self.ai_tutor: PersonalAIAssistant = PersonalAIAssistant() + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str, **kwargs): + try: + response = self.ai_tutor.ask(question=query, user_id=self.user_id) + # you can get the memory value also. + # print(self.ai_tutor.get_memories(user_id=self.user_id)) + return response + except Exception as e: + return e.args[0] + + def encode_response(self, output, **kwargs): + return {'response': output} + + +if __name__ == '__main__': + api = ReactRAGServingAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/client.py b/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/client.py new file mode 100644 index 0000000..e396db2 --- /dev/null +++ b/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/client.py @@ -0,0 +1,17 @@ +# Copyright The Lightning AI team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import requests + +response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0}) +print(f"Status: {response.status_code}\nResponse:\n {response.text}") diff --git a/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/personal_assistant.py b/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/personal_assistant.py new file mode 100644 index 0000000..24145a0 --- /dev/null +++ b/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/personal_assistant.py @@ -0,0 +1,79 @@ +from openai import OpenAI +from mem0 import Memory +from dotenv import load_dotenv, find_dotenv +import os + + +load_dotenv(find_dotenv()) + +# Set the OpenAI API key +os.environ['OPENAI_API_KEY'] = os.environ.get("OPENAI_API_KEY") + +# Initialize the OpenAI client +client = OpenAI() + + +class PersonalAIAssistant: + def __init__(self): + """ + Initialize the PersonalAITutor with memory configuration and OpenAI client. + """ + config = { + "vector_store": { + "provider": "qdrant", + "config": { + "collection_name": "personal_assistant_memory", + "url": "http://localhost:6333", + "api_key": "th3s3cr3tk3y" + } + }, + } + self.memory = Memory.from_config(config) + self.client = client + self.app_id = "assistant-app" + + def ask(self, question, user_id=None): + """ + Ask a question to the AI and store the relevant facts in memory + + :param question: The question to ask the AI. + :param user_id: Optional user ID to associate with the memory. + """ + # Start a streaming chat completion request to the AI + stream = self.client.chat.completions.create( + model="gpt-4o-2024-08-06", + stream=True, + messages=[ + {"role": "system", "content": "You are a personal AI Assistant."}, + {"role": "user", "content": question} + ] + ) + # Store the question in memory + self.memory.add(question, user_id=user_id, metadata={"app_id": self.app_id}) + response: str = '' + # Print the response from the AI in real-time + for chunk in stream: + if chunk.choices[0].delta.content is not None: + print(chunk.choices[0].delta.content, end="") + response = response + chunk.choices[0].delta.content + + return response + + def get_memories(self, user_id=None): + """ + Retrieve all memories associated with the given user ID. + + :param user_id: Optional user ID to filter memories. + :return: List of memories. + """ + return self.memory.get_all(user_id=user_id) + + +# Instantiate the PersonalAITutor +# ai_tutor = PersonalAIAssistant() + +# Define a user ID +# user_id = "pavan_mantha" + +# Ask a question +# ai_tutor.ask("What is my first question.", user_id=user_id) diff --git a/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/requirements.txt b/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/requirements.txt new file mode 100644 index 0000000..1bb3af6 --- /dev/null +++ b/bootstraprag/templates/mem0/personal_ai_assistant_with_memory/requirements.txt @@ -0,0 +1,4 @@ +openai==1.57.4 +mem0ai==0.1.34 +python-dotenv==1.0.1 +litserve==0.2.5 diff --git a/bootstraprag/templates/mem0/react_agent_with_memory/.env b/bootstraprag/templates/mem0/react_agent_with_memory/.env new file mode 100644 index 0000000..d967bcc --- /dev/null +++ b/bootstraprag/templates/mem0/react_agent_with_memory/.env @@ -0,0 +1,26 @@ +QDRANT_URL='http://localhost:6333' +QDRANT_API_KEY='th3s3cr3tk3y' +COLLECTION_NAME='REACT_COLLECTION_WITH_MEM0' + +OPENAI_API_KEY='' +OPENAI_EMBED_MODEL='' + +# use this incase you are prefering to experiment with local models. +OLLAMA_BASE_URL='http://localhost:11434' +OLLAMA_LLM_MODEL='llama3.2:latest' +OLLAMA_EMBED_MODEL='nomic-embed-text:latest' + +# logger can be controlled usiing env +CRITICAL = 50 +FATAL = 50 +ERROR = 40 +WARNING = 30 +WARN = 30 +INFO = 20 +DEBUG = 10 +NOTSET = 0 + +LIT_SERVER_PORT=8000 +LIT_SERVER_WORKERS_PER_DEVICE=4 + +IS_EVALUATION_NEEDED=true diff --git a/bootstraprag/templates/mem0/react_agent_with_memory/__init__.py b/bootstraprag/templates/mem0/react_agent_with_memory/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/mem0/react_agent_with_memory/api_server.py b/bootstraprag/templates/mem0/react_agent_with_memory/api_server.py new file mode 100644 index 0000000..42d66bd --- /dev/null +++ b/bootstraprag/templates/mem0/react_agent_with_memory/api_server.py @@ -0,0 +1,34 @@ +from abc import ABC +from dotenv import load_dotenv, find_dotenv +from react_agent_with_query_engine import ReActWithQueryEngine +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class ReactRAGServingAPI(ls.LitAPI, ABC): + def __init__(self): + self.react_with_engine = None + + def setup(self, devices): + self.react_with_engine = ReActWithQueryEngine(input_dir='data', show_progress=True) + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str): + try: + return self.react_with_engine.query(user_query=query) + except Exception as e: + return e.args[0] + + def encode_response(self, output, **kwargs): + return {'response': output} + + +if __name__ == '__main__': + api = ReactRAGServingAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/mem0/react_agent_with_memory/client.py b/bootstraprag/templates/mem0/react_agent_with_memory/client.py new file mode 100644 index 0000000..12b71e5 --- /dev/null +++ b/bootstraprag/templates/mem0/react_agent_with_memory/client.py @@ -0,0 +1,18 @@ + +# Copyright The Lightning AI team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import requests + +response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0}) +print(f"Status: {response.status_code}\nResponse:\n {response.text}") diff --git a/bootstraprag/templates/mem0/react_agent_with_memory/data/mlops.pdf b/bootstraprag/templates/mem0/react_agent_with_memory/data/mlops.pdf new file mode 100644 index 0000000..c8d8170 Binary files /dev/null and b/bootstraprag/templates/mem0/react_agent_with_memory/data/mlops.pdf differ diff --git a/bootstraprag/templates/mem0/react_agent_with_memory/mem0_configs.py b/bootstraprag/templates/mem0/react_agent_with_memory/mem0_configs.py new file mode 100644 index 0000000..bc227db --- /dev/null +++ b/bootstraprag/templates/mem0/react_agent_with_memory/mem0_configs.py @@ -0,0 +1,36 @@ +import os +import uuid +from dotenv import load_dotenv, find_dotenv + +load_dotenv(find_dotenv()) + +context = { + "user_id": "pavan_mantha", + "agent_id": "react_agent", + "run_id": str(uuid.uuid4()), +} + +config = { + "vector_store": { + "provider": "qdrant", + "config": { + "collection_name": os.environ.get('COLLECTION_NAME'), + "url": os.environ.get('QDRANT_URL'), + "api_key": os.environ.get('QDRANT_API_KEY'), + "embedding_model_dims": 768, # Change this according to your local model's dimensions + }, + }, + "llm": { + "provider": "ollama", + "config": { + "model": "llama3.2:latest", + "temperature": 0.2, + "max_tokens": 1500, + }, + }, + "embedder": { + "provider": "ollama", + "config": {"model": "nomic-embed-text:latest"}, + }, + "version": "v1.1", +} \ No newline at end of file diff --git a/bootstraprag/templates/mem0/react_agent_with_memory/react_agent_with_query_engine.py b/bootstraprag/templates/mem0/react_agent_with_memory/react_agent_with_query_engine.py new file mode 100644 index 0000000..e449f6a --- /dev/null +++ b/bootstraprag/templates/mem0/react_agent_with_memory/react_agent_with_query_engine.py @@ -0,0 +1,134 @@ +from llama_index.core import ( + SimpleDirectoryReader, + VectorStoreIndex, + StorageContext, + Settings +) +from llama_index.core.tools import QueryEngineTool, ToolMetadata +from llama_index.embeddings.ollama import OllamaEmbedding +from llama_index.vector_stores.qdrant import QdrantVectorStore +from llama_index.core.agent import ReActAgent +from llama_index.llms.ollama import Ollama +from llama_index.core.base.response.schema import Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse +from dotenv import load_dotenv, find_dotenv +from typing import Union +from mem0_configs import config, context +from llama_index.memory.mem0 import Mem0Memory +import qdrant_client +import logging +import os + +_ = load_dotenv(find_dotenv()) + +logging.basicConfig(level=int(os.environ['INFO'])) +logger = logging.getLogger(__name__) + + +class ReActWithQueryEngine: + RESPONSE_TYPE = Union[ + Response, StreamingResponse, AsyncStreamingResponse, PydanticResponse + ] + + def __init__(self, input_dir: str, similarity_top_k: int = 3, chunk_size: int = 128, chunk_overlap: int = 100, + show_progress: bool = False, no_of_iterations: int = 5, required_exts: list[str] = ['.pdf', '.txt']): + self.index_loaded = False + self.similarity_top_k = similarity_top_k + self.input_dir = input_dir + self._index = None + self._engine = None + self.agent: ReActAgent = None + self.query_engine_tools = [] + self.show_progress = show_progress + self.no_of_iterations = no_of_iterations + self.required_exts = required_exts + + # the mem0 configs for llamaindex + self.memory = Mem0Memory.from_config( + context=context, + config=config, + search_msg_limit=10 # default is 5 + ) + + # use your prefered vector embeddings model + logger.info("initializing the OllamaEmbedding") + embed_model = OllamaEmbedding(model_name=os.environ['OLLAMA_EMBED_MODEL'], + base_url=os.environ['OLLAMA_BASE_URL']) + # openai embeddings, embedding_model_name="text-embedding-3-large" + # embed_model = OpenAIEmbedding(embed_batch_size=10, model=embedding_model_name) + + # use your prefered llm + llm = Ollama(model=os.environ['OLLAMA_LLM_MODEL'], base_url=os.environ['OLLAMA_BASE_URL'], request_timeout=600) + # llm = OpenAI(model="gpt-4o") + + logger.info("initializing the global settings") + Settings.embed_model = embed_model + Settings.llm = llm + Settings.chunk_size = chunk_size + Settings.chunk_overlap = chunk_overlap + + # Create a local Qdrant vector store + logger.info("initializing the vector store related objects") + self.client: qdrant_client.QdrantClient = qdrant_client.QdrantClient(url=os.environ['QDRANT_URL'], + api_key=os.environ['QDRANT_API_KEY']) + self.vector_store = QdrantVectorStore(client=self.client, collection_name=os.environ['COLLECTION_NAME']) + self._load_data_and_create_engine() + + def _load_data_and_create_engine(self): + if self.client.collection_exists(collection_name=os.environ['COLLECTION_NAME']): + try: + self._index = VectorStoreIndex.from_vector_store(vector_store=self.vector_store) + self.index_loaded = True + except Exception as e: + self.index_loaded = False + + if not self.index_loaded: + # load data + _docs = (SimpleDirectoryReader(input_dir=self.input_dir, required_exts=self.required_exts) + .load_data(show_progress=self.show_progress)) + + # build and persist index + storage_context = StorageContext.from_defaults(vector_store=self.vector_store) + logger.info("indexing the docs in VectorStoreIndex") + self._index = VectorStoreIndex.from_documents(documents=_docs, storage_context=storage_context, + show_progress=self.show_progress) + + self._engine = self._index.as_query_engine(similarity_top_k=self.similarity_top_k) + self._create_query_engine_tools() + + def _create_query_engine_tools(self): + # can have more than one as per the requirement + self.query_engine_tools.append( + QueryEngineTool( + query_engine=self._engine, + metadata=ToolMetadata( + name="react_tool_engine", # change this accordingly + description=( + "Provides information about user query based on the information that you have. " + "Use a detailed plain text question as input to the tool." + ), + ), + ) + ) + self._create_react_agent() + + def _create_react_agent(self): + # [Optional] Add Context + # context = """\ + # You are a stock market sorcerer who is an expert on the companies Lyft and Uber.\ + # You will answer questions about Uber and Lyft as in the persona of a sorcerer \ + # and veteran stock market investor. + # """ + self.agent = ReActAgent.from_tools( + self.query_engine_tools, + llm=Settings.llm, + verbose=True, + # context=context + max_iterations=self.no_of_iterations, + memory=self.memory + ) + + def query(self, user_query: str) -> RESPONSE_TYPE: + try: + return self.agent.query(str_or_query_bundle=user_query) + except Exception as e: + logger.error(f'Error while generating response: {e}') diff --git a/bootstraprag/templates/mem0/react_agent_with_memory/requirements.txt b/bootstraprag/templates/mem0/react_agent_with_memory/requirements.txt new file mode 100644 index 0000000..d2deece --- /dev/null +++ b/bootstraprag/templates/mem0/react_agent_with_memory/requirements.txt @@ -0,0 +1,10 @@ +python-dotenv==1.0.1 +llama-index==0.12.5 +llama-index-llms-ollama==0.5.0 +llama-index-embeddings-ollama==0.5.0 +llama-index-vector-stores-qdrant==0.4.0 +qdrant-client==1.12.1 +llama-index-memory-mem0==0.2.1 +mem0ai==0.1.34 +litserve==0.2.5 + diff --git a/bootstraprag/templates/phidata/__init__.py b/bootstraprag/templates/phidata/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/phidata/agentic_rag/.env b/bootstraprag/templates/phidata/agentic_rag/.env new file mode 100644 index 0000000..adc23b8 --- /dev/null +++ b/bootstraprag/templates/phidata/agentic_rag/.env @@ -0,0 +1,7 @@ +OPENAI_API_KEY=sk-proj- +QDRANT_API_KEY=th3s3cr3tk3y +QDRANT_URL=http://localhost:6333 +collection_name=thai-phidata-index + +LIT_SERVER_WORKERS_PER_DEVICE=2 +LIT_SERVER_PORT=8000 \ No newline at end of file diff --git a/bootstraprag/templates/phidata/agentic_rag/__init__.py b/bootstraprag/templates/phidata/agentic_rag/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/phidata/agentic_rag/agentic_rag.py b/bootstraprag/templates/phidata/agentic_rag/agentic_rag.py new file mode 100644 index 0000000..3e5131e --- /dev/null +++ b/bootstraprag/templates/phidata/agentic_rag/agentic_rag.py @@ -0,0 +1,71 @@ +import os +import typer +import qdrant_client +from typing import Optional +from rich.prompt import Prompt + +from phi.agent import Agent +from phi.model.ollama import Ollama +from phi.embedder.ollama import OllamaEmbedder +from phi.knowledge.pdf import PDFKnowledgeBase, PDFUrlKnowledgeBase +from phi.vectordb.qdrant import Qdrant +from dotenv import load_dotenv, find_dotenv + +load_dotenv(find_dotenv()) + +api_key = os.getenv("QDRANT_API_KEY") +qdrant_url = os.getenv("QDRANT_URL") +collection_name = os.getenv("collection_name") + +qdrantClient = qdrant_client.QdrantClient(url=qdrant_url, api_key=api_key) + +vector_db = Qdrant( + collection=collection_name, + embedder=OllamaEmbedder(model="nomic-embed-text:latest", dimensions=768, host="http://localhost:11434"), + url=qdrant_url, + api_key=api_key, +) + +knowledge_base = PDFKnowledgeBase( + path="data", + vector_db=vector_db, +) + +# Comment out after first run +if not qdrantClient.collection_exists(collection_name=collection_name): + knowledge_base.load(recreate=True, upsert=True, skip_existing=True) + + +def qdrant_agent(user: str = "user"): + run_id: Optional[str] = None + + agent = Agent( + run_id=run_id, + model=Ollama(id="llama3.2:latest", host="http://localhost:11434"), + user_id=user, + knowledge=knowledge_base, + tool_calls=True, + use_tools=True, + show_tool_calls=True, + debug_mode=True, + ) + + if run_id is None: + run_id = agent.run_id + print(f"Started Run: {run_id}\n") + else: + print(f"Continuing Run: {run_id}\n") + + return agent + + +# if __name__ == "__main__": +# _agent = qdrant_agent('pavan_mantha') +# user_id = 'pavan_mantha' # change the user id accordingly +# while True: +# message = Prompt.ask(f"[bold] :sunglasses: {user_id} [/bold]") +# if message in ("exit", "bye"): +# break +# # _agent.print_response(message) +# resp = _agent.run(message=message) +# print(resp) diff --git a/bootstraprag/templates/phidata/agentic_rag/api_server.py b/bootstraprag/templates/phidata/agentic_rag/api_server.py new file mode 100644 index 0000000..995bcc1 --- /dev/null +++ b/bootstraprag/templates/phidata/agentic_rag/api_server.py @@ -0,0 +1,38 @@ +from abc import ABC +from dotenv import load_dotenv, find_dotenv +from agentic_rag import qdrant_agent +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class ReactRAGServingAPI(ls.LitAPI, ABC): + def __init__(self): + self.ai_agent = None + self.user_id = "pavan_mantha" + + def setup(self, devices): + self.ai_agent = qdrant_agent(user='pavan_mantha') + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str, **kwargs): + try: + response = self.ai_agent.run(message=query) + # you can get the memory value also. + # print(self.ai_tutor.get_memories(user_id=self.user_id)) + return response + except Exception as e: + return e.args[0] + + def encode_response(self, output, **kwargs): + return {'response': output} + + +if __name__ == '__main__': + api = ReactRAGServingAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat-completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/phidata/agentic_rag/client.py b/bootstraprag/templates/phidata/agentic_rag/client.py new file mode 100644 index 0000000..e396db2 --- /dev/null +++ b/bootstraprag/templates/phidata/agentic_rag/client.py @@ -0,0 +1,17 @@ +# Copyright The Lightning AI team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import requests + +response = requests.post("http://127.0.0.1:8000/predict", json={"input": 4.0}) +print(f"Status: {response.status_code}\nResponse:\n {response.text}") diff --git a/bootstraprag/templates/phidata/agentic_rag/data/mlops.pdf b/bootstraprag/templates/phidata/agentic_rag/data/mlops.pdf new file mode 100644 index 0000000..c8d8170 Binary files /dev/null and b/bootstraprag/templates/phidata/agentic_rag/data/mlops.pdf differ diff --git a/bootstraprag/templates/phidata/agentic_rag/requirements.txt b/bootstraprag/templates/phidata/agentic_rag/requirements.txt new file mode 100644 index 0000000..135de53 --- /dev/null +++ b/bootstraprag/templates/phidata/agentic_rag/requirements.txt @@ -0,0 +1,4 @@ +phidata==2.7.2 +python-dotenv==1.0.1 +qdrant-client==1.12.1 +litserve==0.2.5 \ No newline at end of file