diff --git a/bootstraprag/cli.py b/bootstraprag/cli.py index 32bdaf3..9bbd297 100644 --- a/bootstraprag/cli.py +++ b/bootstraprag/cli.py @@ -37,6 +37,8 @@ def create(project_name, framework, template, observability): 'rag-with-self-correction', 'rag-with-controllable-agents', 'rag-with-llama-parse', + 'rag-with-adjacent-context', + 'rag-with-citation', 'agents-with-introspection', 'llama-deploy-with-simplemq', 'llama-deploy-with-rabbitmq', diff --git a/bootstraprag/templates/llamaindex/rag_with_citation/.env b/bootstraprag/templates/llamaindex/rag_with_citation/.env new file mode 100644 index 0000000..493af72 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_citation/.env @@ -0,0 +1,24 @@ +DB_URL='http://localhost:6333' +DB_API_KEY='th3s3cr3tk3y' +COLLECTION_NAME='CITATION_COLLECTION' + +OPENAI_API_KEY='' +OPENAI_EMBED_MODEL='' + +# use this incase you are prefering to experiment with local models. +OLLAMA_BASE_URL='http://localhost:11434' +OLLAMA_LLM_MODEL='llama3.2:latest' +OLLAMA_EMBED_MODEL='nomic-embed-text:latest' + +# logger can be controlled usiing env +CRITICAL = 50 +FATAL = 50 +ERROR = 40 +WARNING = 30 +WARN = 30 +INFO = 20 +DEBUG = 10 +NOTSET = 0 + +LIT_SERVER_PORT=8000 +LIT_SERVER_WORKERS_PER_DEVICE=4 \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_citation/__init__.py b/bootstraprag/templates/llamaindex/rag_with_citation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bootstraprag/templates/llamaindex/rag_with_citation/api_server.py b/bootstraprag/templates/llamaindex/rag_with_citation/api_server.py new file mode 100644 index 0000000..a8fbf74 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_citation/api_server.py @@ -0,0 +1,34 @@ +from abc import ABC +from rag_with_citation import CitationQueryEngineRAG +from dotenv import load_dotenv, find_dotenv +import litserve as ls +import os + +_ = load_dotenv(find_dotenv()) + + +class CitationRAGAPI(ls.LitAPI, ABC): + def __init__(self): + self.citation_rag: CitationQueryEngineRAG = None + + def setup(self, devices): + self.citation_rag = CitationQueryEngineRAG() + + def decode_request(self, request, **kwargs): + return request["query"] + + def predict(self, query: str): + try: + return self.citation_rag.query(question=query) + except Exception as e: + return e.args[0] + + def encode_response(self, output, **kwargs): + return {'assistant': output} + + +if __name__ == '__main__': + api = CitationRAGAPI() + server = ls.LitServer(lit_api=api, api_path='/api/v1/chat/completion', + workers_per_device=int(os.environ.get('LIT_SERVER_WORKERS_PER_DEVICE'))) + server.run(port=os.environ.get('LIT_SERVER_PORT')) diff --git a/bootstraprag/templates/llamaindex/rag_with_citation/data/mlops.pdf b/bootstraprag/templates/llamaindex/rag_with_citation/data/mlops.pdf new file mode 100644 index 0000000..c8d8170 Binary files /dev/null and b/bootstraprag/templates/llamaindex/rag_with_citation/data/mlops.pdf differ diff --git a/bootstraprag/templates/llamaindex/rag_with_citation/main.py b/bootstraprag/templates/llamaindex/rag_with_citation/main.py new file mode 100644 index 0000000..a342e6c --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_citation/main.py @@ -0,0 +1,7 @@ +from rag_with_citation import CitationQueryEngineRAG + +# Example usage +if __name__ == "__main__": + engine = CitationQueryEngineRAG() + response = engine.query("What are the benefits of MLOps?") + print(response) \ No newline at end of file diff --git a/bootstraprag/templates/llamaindex/rag_with_citation/rag_with_citation.py b/bootstraprag/templates/llamaindex/rag_with_citation/rag_with_citation.py new file mode 100644 index 0000000..4891fc3 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_citation/rag_with_citation.py @@ -0,0 +1,90 @@ +import os +from llama_index.core.query_engine import CitationQueryEngine +from llama_index.core import ( + VectorStoreIndex, + SimpleDirectoryReader, + StorageContext, + Settings +) +from llama_index.embeddings.ollama import OllamaEmbedding +from llama_index.llms.ollama import Ollama +from llama_index.vector_stores.qdrant import QdrantVectorStore +from dotenv import load_dotenv, find_dotenv +import qdrant_client + + +class CitationQueryEngineRAG: + def __init__(self, data_path="data", required_exts=None): + """ + Initializes the MLOpsQueryEngine with data path and environment variables. + """ + if required_exts is None: + required_exts = ['.pdf', '.txt'] + + load_dotenv(find_dotenv()) + + self.data_path = data_path + self.required_exts = required_exts + + # Initialize settings + self._initialize_settings() + + # Load documents + self.documents = self._load_documents() + + # Initialize vector store + self.vector_store = self._initialize_vector_store() + + # Create storage context + self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store) + + # Create index + self.index = VectorStoreIndex.from_documents( + documents=self.documents, storage_context=self.storage_context + ) + + # Initialize query engine + self.query_engine = CitationQueryEngine.from_args( + self.index, + similarity_top_k=3, + citation_chunk_size=256, + ) + + def _initialize_settings(self): + """ + Initialize LLM and embedding model settings. + """ + Settings.llm = Ollama( + model=os.environ.get("OLLAMA_LLM_MODEL"), + base_url=os.environ.get("OLLAMA_BASE_URL") + ) + Settings.embed_model = OllamaEmbedding( + model_name=os.environ.get("OLLAMA_EMBED_MODEL"), + base_url=os.environ.get("OLLAMA_BASE_URL") + ) + + def _load_documents(self): + """ + Loads documents from the specified directory. + """ + return SimpleDirectoryReader( + self.data_path, required_exts=self.required_exts + ).load_data(show_progress=True) + + def _initialize_vector_store(self): + """ + Initializes the Qdrant vector store. + """ + client = qdrant_client.QdrantClient( + url=os.environ['DB_URL'], api_key=os.environ['DB_API_KEY'] + ) + return QdrantVectorStore( + client=client, + collection_name=os.environ['COLLECTION_NAME'] + ) + + def query(self, question): + """ + Queries the MLOps query engine. + """ + return self.query_engine.query(question) diff --git a/bootstraprag/templates/llamaindex/rag_with_citation/readme.md b/bootstraprag/templates/llamaindex/rag_with_citation/readme.md new file mode 100644 index 0000000..1bcc144 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_citation/readme.md @@ -0,0 +1,28 @@ + +### Project Structure + +``` +. +├── api_server.py +├── data +│ └── mlops.pdf +├── main.py +├── rag_with_citation.py +├── readme.md +└── requirements.txt + +``` +### How to run ? +- `pip install -r requirementx.txt` +- edit `.env` file accordingly +- `python main.py` + +### want to expose as API ? +- `python api_server.py` +- URI: http://localhost:8000/api/v1/chat/completion +- method: POST +- payload +```json +{ + "query": "what are the problems of mlops" +} diff --git a/bootstraprag/templates/llamaindex/rag_with_citation/requirements.txt b/bootstraprag/templates/llamaindex/rag_with_citation/requirements.txt new file mode 100644 index 0000000..8d34063 --- /dev/null +++ b/bootstraprag/templates/llamaindex/rag_with_citation/requirements.txt @@ -0,0 +1,9 @@ +llama-index +llama-index-llms-openai +llama-index-llms-ollama +llama-index-embeddings-ollama +llama-index-embeddings-openai +llama-index-vector-stores-qdrant +qdrant-client +litserve +python-dotenv \ No newline at end of file diff --git a/setup.py b/setup.py index ee4c299..a2af65b 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='bootstrap-rag', - version='0.0.14', + version='0.0.15', long_description=long_description, long_description_content_type="text/markdown", packages=find_packages(),