diff --git a/dspy/retrieve/llama_index_rm.py b/dspy/retrieve/llama_index_rm.py new file mode 100644 index 0000000000..639d849ba8 --- /dev/null +++ b/dspy/retrieve/llama_index_rm.py @@ -0,0 +1,90 @@ +import logging +from typing import Optional + +import dspy + +try: + from llama_index.core.base.base_retriever import BaseRetriever +except ImportError: + err = "The 'llama_index' package is required to use LlamaIndexRM. Install it with 'pip install llama_index'." + raise ImportError(err) from None + +NO_TOP_K_WARNING = "The underlying LlamaIndex retriever does not support top k retrieval. Ignoring k value." + + +class LlamaIndexRM(dspy.Retrieve): + """Implements a retriever which wraps over a LlamaIndex retriever. + + This is done to bridge LlamaIndex and DSPy and allow the various retrieval + abstractions in LlamaIndex to be used in DSPy. + + To-do (maybe): + - Async support (DSPy lacks this entirely it seems, so not a priority until the rest of the repo catches on) + - Text/video retrieval (Available in LI, not sure if this will be a priority in DSPy) + + Args: + retriever (BaseRetriever): A LlamaIndex retriever object - text based only + k (int): Optional; the number of examples to retrieve (similarity_top_k) + + If the underlying LI retriever does not have the property similarity_top_k, k will be ignored. + + Returns: + DSPy RM Object - this is a retriever object that can be used in DSPy + """ + + retriever: BaseRetriever + + def __init__( + self, + retriever: BaseRetriever, + k: Optional[int] = None, + ): + self.retriever = retriever + + if k: + self.k = k + + @property + def k(self) -> Optional[int]: + """Get similarity top k of retriever.""" + if not hasattr(self.retriever, "similarity_top_k"): + logging.warning(NO_TOP_K_WARNING) + return None + + return self.retriever.similarity_top_k + + @k.setter + def k(self, k: int) -> None: + """Set similarity top k of retriever.""" + if hasattr(self.retriever, "similarity_top_k"): + self.retriever.similarity_top_k = k + else: + logging.warning(NO_TOP_K_WARNING) + + def forward(self, query: str, k: Optional[int] = None) -> list[dspy.Example]: + """Forward function for the LI retriever. + + This is the function that is called to retrieve the top k examples for a given query. + Top k is set via the setter similarity_top_k or at LI instantiation. + + Args: + query (str): The query to retrieve examples for + k (int): Optional; the number of examples to retrieve (similarity_top_k) + + If the underlying LI retriever does not have the property similarity_top_k, k will be ignored. + + Returns: + List[dspy.Example]: A list of examples retrieved by the retriever + """ + if k: + self.k = k + + raw = self.retriever.retrieve(query) + + return [ + dspy.Example( + text=result.text, + score=result.score, + ) + for result in raw + ] diff --git a/poetry.lock b/poetry.lock index 7d9f6b8516..294d720682 100644 --- a/poetry.lock +++ b/poetry.lock @@ -201,7 +201,7 @@ trio = ["trio (>=0.23)"] name = "appnope" version = "0.1.4" description = "Disable App Nap on macOS >= 10.9" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c"}, @@ -297,7 +297,7 @@ files = [ name = "asttokens" version = "2.4.1" description = "Annotate AST trees with source code positions" -optional = true +optional = false python-versions = "*" files = [ {file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"}, @@ -651,7 +651,7 @@ files = [ name = "cffi" version = "1.16.0" description = "Foreign Function Interface for Python calling C code." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, @@ -973,7 +973,7 @@ development = ["black", "flake8", "mypy", "pytest", "types-colorama"] name = "comm" version = "0.2.2" description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"}, @@ -1040,6 +1040,21 @@ ssh = ["bcrypt (>=3.1.5)"] test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] +[[package]] +name = "dataclasses-json" +version = "0.6.5" +description = "Easily serialize dataclasses to and from JSON." +optional = true +python-versions = "<4.0,>=3.7" +files = [ + {file = "dataclasses_json-0.6.5-py3-none-any.whl", hash = "sha256:f49c77aa3a85cac5bf5b7f65f4790ca0d2be8ef4d92c75e91ba0103072788a39"}, + {file = "dataclasses_json-0.6.5.tar.gz", hash = "sha256:1c287594d9fcea72dc42d6d3836cf14848c2dc5ce88f65ed61b36b57f515fe26"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + [[package]] name = "datasets" version = "2.18.0" @@ -1088,7 +1103,7 @@ vision = ["Pillow (>=6.2.1)"] name = "debugpy" version = "1.8.1" description = "An implementation of the Debug Adapter Protocol for Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "debugpy-1.8.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:3bda0f1e943d386cc7a0e71bfa59f4137909e2ed947fb3946c506e113000f741"}, @@ -1119,7 +1134,7 @@ files = [ name = "decorator" version = "5.1.1" description = "Decorators for Humans" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, @@ -1158,6 +1173,17 @@ files = [ graph = ["objgraph (>=1.7.2)"] profile = ["gprof2dot (>=2022.7.29)"] +[[package]] +name = "dirtyjson" +version = "1.0.8" +description = "JSON decoder for Python that can extract data from the muck" +optional = true +python-versions = "*" +files = [ + {file = "dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53"}, + {file = "dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd"}, +] + [[package]] name = "distlib" version = "0.3.8" @@ -1265,7 +1291,7 @@ test = ["pytest (>=6)"] name = "executing" version = "2.0.1" description = "Get the currently executing AST node of a frame, and other information" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"}, @@ -2124,7 +2150,7 @@ files = [ name = "ipykernel" version = "6.29.4" description = "IPython Kernel for Jupyter" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "ipykernel-6.29.4-py3-none-any.whl", hash = "sha256:1181e653d95c6808039c509ef8e67c4126b3b3af7781496c7cbfb5ed938a27da"}, @@ -2157,7 +2183,7 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio name = "ipython" version = "8.18.1" description = "IPython: Productive Interactive Computing" -optional = true +optional = false python-versions = ">=3.9" files = [ {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"}, @@ -2208,7 +2234,7 @@ six = "*" name = "jedi" version = "0.19.1" description = "An autocompletion tool for Python that can be used for text editors." -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "jedi-0.19.1-py2.py3-none-any.whl", hash = "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0"}, @@ -2328,7 +2354,7 @@ testing = ["coverage", "ipykernel", "jupytext", "matplotlib", "nbdime", "nbforma name = "jupyter-client" version = "8.6.1" description = "Jupyter protocol implementation and client libraries" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "jupyter_client-8.6.1-py3-none-any.whl", hash = "sha256:3b7bd22f058434e3b9a7ea4b1500ed47de2713872288c0d511d19926f99b459f"}, @@ -2351,7 +2377,7 @@ test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pyt name = "jupyter-core" version = "5.7.2" description = "Jupyter core package. A base package on which Jupyter projects rely." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409"}, @@ -2393,6 +2419,300 @@ websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0" [package.extras] adal = ["adal (>=1.0.2)"] +[[package]] +name = "llama-index" +version = "0.10.35" +description = "Interface between LLMs and your data" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index-0.10.35-py3-none-any.whl", hash = "sha256:1e30f7dceff5e05cb9bfe8727b767487dfe4f97dc2c71a4ca8276aa983dc6e9e"}, + {file = "llama_index-0.10.35.tar.gz", hash = "sha256:cf1b9ac3b65cc4fd035bfeb0010353a65403aa202d830e21dc7beda7e6284e62"}, +] + +[package.dependencies] +llama-index-agent-openai = ">=0.1.4,<0.3.0" +llama-index-cli = ">=0.1.2,<0.2.0" +llama-index-core = ">=0.10.35,<0.11.0" +llama-index-embeddings-openai = ">=0.1.5,<0.2.0" +llama-index-indices-managed-llama-cloud = ">=0.1.2,<0.2.0" +llama-index-legacy = ">=0.9.48,<0.10.0" +llama-index-llms-openai = ">=0.1.13,<0.2.0" +llama-index-multi-modal-llms-openai = ">=0.1.3,<0.2.0" +llama-index-program-openai = ">=0.1.3,<0.2.0" +llama-index-question-gen-openai = ">=0.1.2,<0.2.0" +llama-index-readers-file = ">=0.1.4,<0.2.0" +llama-index-readers-llama-parse = ">=0.1.2,<0.2.0" + +[[package]] +name = "llama-index-agent-openai" +version = "0.2.4" +description = "llama-index agent openai integration" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_agent_openai-0.2.4-py3-none-any.whl", hash = "sha256:b05eb7f0331d40a7a2bcaabaa84c9c7ebe6837a72038d03cbb71c083a4301a81"}, + {file = "llama_index_agent_openai-0.2.4.tar.gz", hash = "sha256:cd4a58f8bf233728ceda554cbb34de56a2b6bbbbff6ce801c3f8ff0c8280bf55"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.35,<0.11.0" +llama-index-llms-openai = ">=0.1.5,<0.2.0" +openai = ">=1.14.0" + +[[package]] +name = "llama-index-cli" +version = "0.1.12" +description = "llama-index cli" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_cli-0.1.12-py3-none-any.whl", hash = "sha256:d80d546786f02d3f16f6183b8e86b22b8b5c33a1500923659f2ccbff8d5df634"}, + {file = "llama_index_cli-0.1.12.tar.gz", hash = "sha256:3cf1f706c3c69c6b1aab07fca7faad3959db1709808efd50491b669d38b0b580"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.11.post1,<0.11.0" +llama-index-embeddings-openai = ">=0.1.1,<0.2.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" + +[[package]] +name = "llama-index-core" +version = "0.10.35.post1" +description = "Interface between LLMs and your data" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_core-0.10.35.post1-py3-none-any.whl", hash = "sha256:1c5993946202a9aec86bd6f0943991d1fe443556bd3e6c7b345cb360a46dc6c2"}, + {file = "llama_index_core-0.10.35.post1.tar.gz", hash = "sha256:f62013217bf7c04b6adf9dc2c1b168ff957f924519f19af2f383a0f0c34308e4"}, +] + +[package.dependencies] +aiohttp = ">=3.8.6,<4.0.0" +dataclasses-json = "*" +deprecated = ">=1.2.9.3" +dirtyjson = ">=1.0.8,<2.0.0" +fsspec = ">=2023.5.0" +httpx = "*" +llamaindex-py-client = ">=0.1.18,<0.2.0" +nest-asyncio = ">=1.5.8,<2.0.0" +networkx = ">=3.0" +nltk = ">=3.8.1,<4.0.0" +numpy = "*" +openai = ">=1.1.0" +pandas = "*" +pillow = ">=9.0.0" +PyYAML = ">=6.0.1" +requests = ">=2.31.0" +SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]} +tenacity = ">=8.2.0,<9.0.0" +tiktoken = ">=0.3.3" +tqdm = ">=4.66.1,<5.0.0" +typing-extensions = ">=4.5.0" +typing-inspect = ">=0.8.0" +wrapt = "*" + +[package.extras] +gradientai = ["gradientai (>=1.4.0)"] +html = ["beautifulsoup4 (>=4.12.2,<5.0.0)"] +langchain = ["langchain (>=0.0.303)"] +local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1.99,<0.2.0)", "transformers[torch] (>=4.33.1,<5.0.0)"] +postgres = ["asyncpg (>=0.29.0,<0.30.0)", "pgvector (>=0.2.4,<0.3.0)", "psycopg2-binary (>=2.9.9,<3.0.0)"] +query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn", "spacy (>=3.7.1,<4.0.0)"] + +[[package]] +name = "llama-index-embeddings-openai" +version = "0.1.9" +description = "llama-index embeddings openai integration" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_embeddings_openai-0.1.9-py3-none-any.whl", hash = "sha256:fbd16d6197b91f4dbdc6d0707e573cc224ac2b0a48d5b370c6232dd8a2282473"}, + {file = "llama_index_embeddings_openai-0.1.9.tar.gz", hash = "sha256:0fd292b2f9a0ad4534a790d6374726bc885853188087eb018167dcf239643924"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" + +[[package]] +name = "llama-index-indices-managed-llama-cloud" +version = "0.1.6" +description = "llama-index indices llama-cloud integration" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_indices_managed_llama_cloud-0.1.6-py3-none-any.whl", hash = "sha256:cba33e1a3677b2a2ae7f239119acbf6dc3818f105edc92315729842b56fbc949"}, + {file = "llama_index_indices_managed_llama_cloud-0.1.6.tar.gz", hash = "sha256:74b3b0e9ebf9d348d3054f9fc0c657031acceb9351c31116ad8d5a7ae4729f5c"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.0,<0.11.0" +llamaindex-py-client = ">=0.1.19,<0.2.0" + +[[package]] +name = "llama-index-legacy" +version = "0.9.48" +description = "Interface between LLMs and your data" +optional = true +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "llama_index_legacy-0.9.48-py3-none-any.whl", hash = "sha256:714ada95beac179b4acefa4d2deff74bb7b2f22b0f699ac247d4cb67738d16d4"}, + {file = "llama_index_legacy-0.9.48.tar.gz", hash = "sha256:82ddc4691edbf49533d65582c249ba22c03fe96fbd3e92f7758dccef28e43834"}, +] + +[package.dependencies] +aiohttp = ">=3.8.6,<4.0.0" +dataclasses-json = "*" +deprecated = ">=1.2.9.3" +dirtyjson = ">=1.0.8,<2.0.0" +fsspec = ">=2023.5.0" +httpx = "*" +nest-asyncio = ">=1.5.8,<2.0.0" +networkx = ">=3.0" +nltk = ">=3.8.1,<4.0.0" +numpy = "*" +openai = ">=1.1.0" +pandas = "*" +requests = ">=2.31.0" +SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]} +tenacity = ">=8.2.0,<9.0.0" +tiktoken = ">=0.3.3" +typing-extensions = ">=4.5.0" +typing-inspect = ">=0.8.0" + +[package.extras] +gradientai = ["gradientai (>=1.4.0)"] +html = ["beautifulsoup4 (>=4.12.2,<5.0.0)"] +langchain = ["langchain (>=0.0.303)"] +local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1.99,<0.2.0)", "transformers[torch] (>=4.33.1,<5.0.0)"] +postgres = ["asyncpg (>=0.28.0,<0.29.0)", "pgvector (>=0.1.0,<0.2.0)", "psycopg2-binary (>=2.9.9,<3.0.0)"] +query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn", "spacy (>=3.7.1,<4.0.0)"] + +[[package]] +name = "llama-index-llms-openai" +version = "0.1.18" +description = "llama-index llms openai integration" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_llms_openai-0.1.18-py3-none-any.whl", hash = "sha256:934cf72d10385f1c76c0183b0e94ce1850fab1026287e01b7db0a14c946dfd79"}, + {file = "llama_index_llms_openai-0.1.18.tar.gz", hash = "sha256:8cb7546a1885ba558ff580b114d638569a0aed81a264961114e719bc42b37100"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.24,<0.11.0" + +[[package]] +name = "llama-index-multi-modal-llms-openai" +version = "0.1.5" +description = "llama-index multi-modal-llms openai integration" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_multi_modal_llms_openai-0.1.5-py3-none-any.whl", hash = "sha256:bb332580e7e4b5f2f87488b3649d2ceb53ee82c848e59694578a982c3982ce0b"}, + {file = "llama_index_multi_modal_llms_openai-0.1.5.tar.gz", hash = "sha256:9a237f4f886d1e20c27e9493e80b3e1f8753859481ff1b58fe25b7aa39b198a2"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" + +[[package]] +name = "llama-index-program-openai" +version = "0.1.6" +description = "llama-index program openai integration" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_program_openai-0.1.6-py3-none-any.whl", hash = "sha256:4660b338503537c5edca1e0dab606af6ce372b4f1b597e2833c6b602447c5d8d"}, + {file = "llama_index_program_openai-0.1.6.tar.gz", hash = "sha256:c6a4980c5ea826088b28b4dee3367edb20221e6d05eb0e05019049190131d772"}, +] + +[package.dependencies] +llama-index-agent-openai = ">=0.1.1,<0.3.0" +llama-index-core = ">=0.10.1,<0.11.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" + +[[package]] +name = "llama-index-question-gen-openai" +version = "0.1.3" +description = "llama-index question_gen openai integration" +optional = true +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "llama_index_question_gen_openai-0.1.3-py3-none-any.whl", hash = "sha256:1f83b49e8b2e665030d1ec8c54687d6985d9fa8426147b64e46628a9e489b302"}, + {file = "llama_index_question_gen_openai-0.1.3.tar.gz", hash = "sha256:4486198117a45457d2e036ae60b93af58052893cc7d78fa9b6f47dd47b81e2e1"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" +llama-index-program-openai = ">=0.1.1,<0.2.0" + +[[package]] +name = "llama-index-readers-file" +version = "0.1.22" +description = "llama-index readers file integration" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_readers_file-0.1.22-py3-none-any.whl", hash = "sha256:a8d4a69a9ea659c14ebb22ca9a5560b9c7ec6f501e7f68f6c52f591374165376"}, + {file = "llama_index_readers_file-0.1.22.tar.gz", hash = "sha256:37de54ad0cfbdc607c195532b9a292417a4714f57773570b87027b8dc381f0e2"}, +] + +[package.dependencies] +beautifulsoup4 = ">=4.12.3,<5.0.0" +llama-index-core = ">=0.10.1,<0.11.0" +pypdf = ">=4.0.1,<5.0.0" +striprtf = ">=0.0.26,<0.0.27" + +[package.extras] +pymupdf = ["pymupdf (>=1.23.21,<2.0.0)"] + +[[package]] +name = "llama-index-readers-llama-parse" +version = "0.1.4" +description = "llama-index readers llama-parse integration" +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_readers_llama_parse-0.1.4-py3-none-any.whl", hash = "sha256:c4914b37d12cceee56fbd185cca80f87d60acbf8ea7a73f9719610180be1fcdd"}, + {file = "llama_index_readers_llama_parse-0.1.4.tar.gz", hash = "sha256:78608b193c818894aefeee0aa303f02b7f80f2e4caf13866c2fd3b0b1023e2c0"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.7,<0.11.0" +llama-parse = ">=0.4.0,<0.5.0" + +[[package]] +name = "llama-parse" +version = "0.4.2" +description = "Parse files into RAG-Optimized formats." +optional = true +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_parse-0.4.2-py3-none-any.whl", hash = "sha256:5ce0390141f216dcd88c1123fea7f2a4f561d177f791a97217a3db3509dec4ff"}, + {file = "llama_parse-0.4.2.tar.gz", hash = "sha256:fa04c09730b102155f6505de9cf91998c86d334581f0f12597c5eb47ca5db859"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.29" + +[[package]] +name = "llamaindex-py-client" +version = "0.1.19" +description = "" +optional = true +python-versions = "<4,>=3.8" +files = [ + {file = "llamaindex_py_client-0.1.19-py3-none-any.whl", hash = "sha256:fd9416fd78b97209bf323bc3c7fab314499778563e7274f10853ad560563d10e"}, + {file = "llamaindex_py_client-0.1.19.tar.gz", hash = "sha256:73f74792bb8c092bae6dc626627a09ac13a099fa8d10f8fcc83e17a2b332cca7"}, +] + +[package.dependencies] +httpx = ">=0.20.0" +pydantic = ">=1.10" + [[package]] name = "loguru" version = "0.7.2" @@ -2595,7 +2915,7 @@ tests = ["pytest", "pytz", "simplejson"] name = "matplotlib-inline" version = "0.1.7" description = "Inline Matplotlib backend for Jupyter" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"}, @@ -3235,7 +3555,7 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] name = "nest-asyncio" version = "1.6.0" description = "Patch asyncio to allow nested event loops" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, @@ -3260,6 +3580,31 @@ doc = ["nb2plots (>=0.7)", "nbconvert (<7.9)", "numpydoc (>=1.6)", "pillow (>=9. extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.11)", "sympy (>=1.10)"] test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] +[[package]] +name = "nltk" +version = "3.8.1" +description = "Natural Language Toolkit" +optional = true +python-versions = ">=3.7" +files = [ + {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, + {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, +] + +[package.dependencies] +click = "*" +joblib = "*" +regex = ">=2021.8.3" +tqdm = "*" + +[package.extras] +all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"] +corenlp = ["requests"] +machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"] +plot = ["matplotlib"] +tgrep = ["pyparsing"] +twitter = ["twython"] + [[package]] name = "nodeenv" version = "1.8.0" @@ -3942,7 +4287,7 @@ xml = ["lxml (>=4.9.2)"] name = "parso" version = "0.8.4" description = "A Python Parser" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"}, @@ -3968,7 +4313,7 @@ files = [ name = "pexpect" version = "4.9.0" description = "Pexpect allows easy control of interactive console applications." -optional = true +optional = false python-versions = "*" files = [ {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"}, @@ -3991,6 +4336,92 @@ files = [ [package.dependencies] numpy = "*" +[[package]] +name = "pillow" +version = "10.3.0" +description = "Python Imaging Library (Fork)" +optional = true +python-versions = ">=3.8" +files = [ + {file = "pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45"}, + {file = "pillow-10.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c"}, + {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78618cdbccaa74d3f88d0ad6cb8ac3007f1a6fa5c6f19af64b55ca170bfa1edf"}, + {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261ddb7ca91fcf71757979534fb4c128448b5b4c55cb6152d280312062f69599"}, + {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ce49c67f4ea0609933d01c0731b34b8695a7a748d6c8d186f95e7d085d2fe475"}, + {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b14f16f94cbc61215115b9b1236f9c18403c15dd3c52cf629072afa9d54c1cbf"}, + {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d33891be6df59d93df4d846640f0e46f1a807339f09e79a8040bc887bdcd7ed3"}, + {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b50811d664d392f02f7761621303eba9d1b056fb1868c8cdf4231279645c25f5"}, + {file = "pillow-10.3.0-cp310-cp310-win32.whl", hash = "sha256:ca2870d5d10d8726a27396d3ca4cf7976cec0f3cb706debe88e3a5bd4610f7d2"}, + {file = "pillow-10.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f"}, + {file = "pillow-10.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:ccce24b7ad89adb5a1e34a6ba96ac2530046763912806ad4c247356a8f33a67b"}, + {file = "pillow-10.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795"}, + {file = "pillow-10.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57"}, + {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdcbb4068117dfd9ce0138d068ac512843c52295ed996ae6dd1faf537b6dbc27"}, + {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9797a6c8fe16f25749b371c02e2ade0efb51155e767a971c61734b1bf6293994"}, + {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:9e91179a242bbc99be65e139e30690e081fe6cb91a8e77faf4c409653de39451"}, + {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b87bd9d81d179bd8ab871603bd80d8645729939f90b71e62914e816a76fc6bd"}, + {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:81d09caa7b27ef4e61cb7d8fbf1714f5aec1c6b6c5270ee53504981e6e9121ad"}, + {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c"}, + {file = "pillow-10.3.0-cp311-cp311-win32.whl", hash = "sha256:7161ec49ef0800947dc5570f86568a7bb36fa97dd09e9827dc02b718c5643f09"}, + {file = "pillow-10.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:8eb0908e954d093b02a543dc963984d6e99ad2b5e36503d8a0aaf040505f747d"}, + {file = "pillow-10.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:4e6f7d1c414191c1199f8996d3f2282b9ebea0945693fb67392c75a3a320941f"}, + {file = "pillow-10.3.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:e46f38133e5a060d46bd630faa4d9fa0202377495df1f068a8299fd78c84de84"}, + {file = "pillow-10.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:50b8eae8f7334ec826d6eeffaeeb00e36b5e24aa0b9df322c247539714c6df19"}, + {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d3bea1c75f8c53ee4d505c3e67d8c158ad4df0d83170605b50b64025917f338"}, + {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19aeb96d43902f0a783946a0a87dbdad5c84c936025b8419da0a0cd7724356b1"}, + {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74d28c17412d9caa1066f7a31df8403ec23d5268ba46cd0ad2c50fb82ae40462"}, + {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ff61bfd9253c3915e6d41c651d5f962da23eda633cf02262990094a18a55371a"}, + {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d886f5d353333b4771d21267c7ecc75b710f1a73d72d03ca06df49b09015a9ef"}, + {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b5ec25d8b17217d635f8935dbc1b9aa5907962fae29dff220f2659487891cd3"}, + {file = "pillow-10.3.0-cp312-cp312-win32.whl", hash = "sha256:51243f1ed5161b9945011a7360e997729776f6e5d7005ba0c6879267d4c5139d"}, + {file = "pillow-10.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:412444afb8c4c7a6cc11a47dade32982439925537e483be7c0ae0cf96c4f6a0b"}, + {file = "pillow-10.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:798232c92e7665fe82ac085f9d8e8ca98826f8e27859d9a96b41d519ecd2e49a"}, + {file = "pillow-10.3.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:4eaa22f0d22b1a7e93ff0a596d57fdede2e550aecffb5a1ef1106aaece48e96b"}, + {file = "pillow-10.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cd5e14fbf22a87321b24c88669aad3a51ec052eb145315b3da3b7e3cc105b9a2"}, + {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1530e8f3a4b965eb6a7785cf17a426c779333eb62c9a7d1bbcf3ffd5bf77a4aa"}, + {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d512aafa1d32efa014fa041d38868fda85028e3f930a96f85d49c7d8ddc0383"}, + {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:339894035d0ede518b16073bdc2feef4c991ee991a29774b33e515f1d308e08d"}, + {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:aa7e402ce11f0885305bfb6afb3434b3cd8f53b563ac065452d9d5654c7b86fd"}, + {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0ea2a783a2bdf2a561808fe4a7a12e9aa3799b701ba305de596bc48b8bdfce9d"}, + {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c78e1b00a87ce43bb37642c0812315b411e856a905d58d597750eb79802aaaa3"}, + {file = "pillow-10.3.0-cp38-cp38-win32.whl", hash = "sha256:72d622d262e463dfb7595202d229f5f3ab4b852289a1cd09650362db23b9eb0b"}, + {file = "pillow-10.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:2034f6759a722da3a3dbd91a81148cf884e91d1b747992ca288ab88c1de15999"}, + {file = "pillow-10.3.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2ed854e716a89b1afcedea551cd85f2eb2a807613752ab997b9974aaa0d56936"}, + {file = "pillow-10.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dc1a390a82755a8c26c9964d457d4c9cbec5405896cba94cf51f36ea0d855002"}, + {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4203efca580f0dd6f882ca211f923168548f7ba334c189e9eab1178ab840bf60"}, + {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3102045a10945173d38336f6e71a8dc71bcaeed55c3123ad4af82c52807b9375"}, + {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:6fb1b30043271ec92dc65f6d9f0b7a830c210b8a96423074b15c7bc999975f57"}, + {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8"}, + {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b09b86b27a064c9624d0a6c54da01c1beaf5b6cadfa609cf63789b1d08a797b9"}, + {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d3b2348a78bc939b4fed6552abfd2e7988e0f81443ef3911a4b8498ca084f6eb"}, + {file = "pillow-10.3.0-cp39-cp39-win32.whl", hash = "sha256:45ebc7b45406febf07fef35d856f0293a92e7417ae7933207e90bf9090b70572"}, + {file = "pillow-10.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:0ba26351b137ca4e0db0342d5d00d2e355eb29372c05afd544ebf47c0956ffeb"}, + {file = "pillow-10.3.0-cp39-cp39-win_arm64.whl", hash = "sha256:50fd3f6b26e3441ae07b7c979309638b72abc1a25da31a81a7fbd9495713ef4f"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:6b02471b72526ab8a18c39cb7967b72d194ec53c1fd0a70b050565a0f366d355"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8ab74c06ffdab957d7670c2a5a6e1a70181cd10b727cd788c4dd9005b6a8acd9"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2ec1e921fd07c7cda7962bad283acc2f2a9ccc1b971ee4b216b75fad6f0463"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c8e73e99da7db1b4cad7f8d682cf6abad7844da39834c288fbfa394a47bbced"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:16563993329b79513f59142a6b02055e10514c1a8e86dca8b48a893e33cf91e3"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:aff76a55a8aa8364d25400a210a65ff59d0168e0b4285ba6bf2bd83cf675ba32"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b7bc2176354defba3edc2b9a777744462da2f8e921fbaf61e52acb95bafa9828"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:793b4e24db2e8742ca6423d3fde8396db336698c55cd34b660663ee9e45ed37f"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c83341b89884e2b2e55886e8fbbf37c3fa5efd6c8907124aeb72f285ae5696e5"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1a1d1915db1a4fdb2754b9de292642a39a7fb28f1736699527bb649484fb966a"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a0eaa93d054751ee9964afa21c06247779b90440ca41d184aeb5d410f20ff591"}, + {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions"] +xmp = ["defusedxml"] + [[package]] name = "pinecone-client" version = "2.2.4" @@ -4111,7 +4542,7 @@ virtualenv = ">=20.10.0" name = "prompt-toolkit" version = "3.0.43" description = "Library for building powerful interactive command lines in Python" -optional = true +optional = false python-versions = ">=3.7.0" files = [ {file = "prompt_toolkit-3.0.43-py3-none-any.whl", hash = "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6"}, @@ -4145,7 +4576,7 @@ files = [ name = "psutil" version = "5.9.8" description = "Cross-platform lib for process and system monitoring in Python." -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"}, @@ -4195,7 +4626,7 @@ files = [ name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" -optional = true +optional = false python-versions = "*" files = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, @@ -4253,7 +4684,7 @@ functions = ["apache-bookkeeper-client (>=4.16.1)", "grpcio (>=1.60.0)", "promet name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" -optional = true +optional = false python-versions = "*" files = [ {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, @@ -4362,7 +4793,7 @@ pyasn1 = ">=0.4.6,<0.7.0" name = "pycparser" version = "2.22" description = "C parser in Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, @@ -4648,6 +5079,27 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pypdf" +version = "4.2.0" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +optional = true +python-versions = ">=3.6" +files = [ + {file = "pypdf-4.2.0-py3-none-any.whl", hash = "sha256:dc035581664e0ad717e3492acebc1a5fc23dba759e788e3d4a9fc9b1a32e72c1"}, + {file = "pypdf-4.2.0.tar.gz", hash = "sha256:fe63f3f7d1dcda1c9374421a94c1bba6c6f8c4a62173a59b64ffd52058f846b1"}, +] + +[package.dependencies] +typing_extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} + +[package.extras] +crypto = ["PyCryptodome", "cryptography"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "pytest-socket", "pytest-timeout", "pytest-xdist", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow (>=8.0.0)", "PyCryptodome", "cryptography"] +image = ["Pillow (>=8.0.0)"] + [[package]] name = "pypika" version = "0.48.9" @@ -4778,7 +5230,7 @@ files = [ name = "pywin32" version = "306" description = "Python for Window Extensions" -optional = true +optional = false python-versions = "*" files = [ {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, @@ -4875,7 +5327,7 @@ pyyaml = "*" name = "pyzmq" version = "26.0.3" description = "Python bindings for 0MQ" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "pyzmq-26.0.3-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:44dd6fc3034f1eaa72ece33588867df9e006a7303725a12d64c3dff92330f625"}, @@ -5962,7 +6414,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +greenlet = {version = "!=0.4.17", optional = true, markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\" or extra == \"asyncio\""} typing-extensions = ">=4.6.0" [package.extras] @@ -5994,7 +6446,7 @@ sqlcipher = ["sqlcipher3_binary"] name = "stack-data" version = "0.6.3" description = "Extract data from python stack frames and tracebacks for informative displays" -optional = true +optional = false python-versions = "*" files = [ {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"}, @@ -6027,6 +6479,17 @@ typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\"" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] +[[package]] +name = "striprtf" +version = "0.0.26" +description = "A simple library to convert rtf to text" +optional = true +python-versions = "*" +files = [ + {file = "striprtf-0.0.26-py3-none-any.whl", hash = "sha256:8c8f9d32083cdc2e8bfb149455aa1cc5a4e0a035893bedc75db8b73becb3a1bb"}, + {file = "striprtf-0.0.26.tar.gz", hash = "sha256:fdb2bba7ac440072d1c41eab50d8d74ae88f60a8b6575c6e2c7805dc462093aa"}, +] + [[package]] name = "structlog" version = "24.1.0" @@ -6099,6 +6562,58 @@ files = [ [package.extras] doc = ["reno", "sphinx", "tornado (>=4.5)"] +[[package]] +name = "tiktoken" +version = "0.6.0" +description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +optional = true +python-versions = ">=3.8" +files = [ + {file = "tiktoken-0.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:277de84ccd8fa12730a6b4067456e5cf72fef6300bea61d506c09e45658d41ac"}, + {file = "tiktoken-0.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9c44433f658064463650d61387623735641dcc4b6c999ca30bc0f8ba3fccaf5c"}, + {file = "tiktoken-0.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afb9a2a866ae6eef1995ab656744287a5ac95acc7e0491c33fad54d053288ad3"}, + {file = "tiktoken-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c62c05b3109fefca26fedb2820452a050074ad8e5ad9803f4652977778177d9f"}, + {file = "tiktoken-0.6.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0ef917fad0bccda07bfbad835525bbed5f3ab97a8a3e66526e48cdc3e7beacf7"}, + {file = "tiktoken-0.6.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e095131ab6092d0769a2fda85aa260c7c383072daec599ba9d8b149d2a3f4d8b"}, + {file = "tiktoken-0.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:05b344c61779f815038292a19a0c6eb7098b63c8f865ff205abb9ea1b656030e"}, + {file = "tiktoken-0.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cefb9870fb55dca9e450e54dbf61f904aab9180ff6fe568b61f4db9564e78871"}, + {file = "tiktoken-0.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:702950d33d8cabc039845674107d2e6dcabbbb0990ef350f640661368df481bb"}, + {file = "tiktoken-0.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8d49d076058f23254f2aff9af603863c5c5f9ab095bc896bceed04f8f0b013a"}, + {file = "tiktoken-0.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:430bc4e650a2d23a789dc2cdca3b9e5e7eb3cd3935168d97d43518cbb1f9a911"}, + {file = "tiktoken-0.6.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:293cb8669757301a3019a12d6770bd55bec38a4d3ee9978ddbe599d68976aca7"}, + {file = "tiktoken-0.6.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7bd1a288b7903aadc054b0e16ea78e3171f70b670e7372432298c686ebf9dd47"}, + {file = "tiktoken-0.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac76e000183e3b749634968a45c7169b351e99936ef46f0d2353cd0d46c3118d"}, + {file = "tiktoken-0.6.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:17cc8a4a3245ab7d935c83a2db6bb71619099d7284b884f4b2aea4c74f2f83e3"}, + {file = "tiktoken-0.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:284aebcccffe1bba0d6571651317df6a5b376ff6cfed5aeb800c55df44c78177"}, + {file = "tiktoken-0.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c1a3a5d33846f8cd9dd3b7897c1d45722f48625a587f8e6f3d3e85080559be8"}, + {file = "tiktoken-0.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6318b2bb2337f38ee954fd5efa82632c6e5ced1d52a671370fa4b2eff1355e91"}, + {file = "tiktoken-0.6.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f5f0f2ed67ba16373f9a6013b68da298096b27cd4e1cf276d2d3868b5c7efd1"}, + {file = "tiktoken-0.6.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:75af4c0b16609c2ad02581f3cdcd1fb698c7565091370bf6c0cf8624ffaba6dc"}, + {file = "tiktoken-0.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:45577faf9a9d383b8fd683e313cf6df88b6076c034f0a16da243bb1c139340c3"}, + {file = "tiktoken-0.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7c1492ab90c21ca4d11cef3a236ee31a3e279bb21b3fc5b0e2210588c4209e68"}, + {file = "tiktoken-0.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e2b380c5b7751272015400b26144a2bab4066ebb8daae9c3cd2a92c3b508fe5a"}, + {file = "tiktoken-0.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9f497598b9f58c99cbc0eb764b4a92272c14d5203fc713dd650b896a03a50ad"}, + {file = "tiktoken-0.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e65e8bd6f3f279d80f1e1fbd5f588f036b9a5fa27690b7f0cc07021f1dfa0839"}, + {file = "tiktoken-0.6.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5f1495450a54e564d236769d25bfefbf77727e232d7a8a378f97acddee08c1ae"}, + {file = "tiktoken-0.6.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6c4e4857d99f6fb4670e928250835b21b68c59250520a1941618b5b4194e20c3"}, + {file = "tiktoken-0.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:168d718f07a39b013032741867e789971346df8e89983fe3c0ef3fbd5a0b1cb9"}, + {file = "tiktoken-0.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:47fdcfe11bd55376785a6aea8ad1db967db7f66ea81aed5c43fad497521819a4"}, + {file = "tiktoken-0.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fb7d2ccbf1a7784810aff6b80b4012fb42c6fc37eaa68cb3b553801a5cc2d1fc"}, + {file = "tiktoken-0.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ccb7a111ee76af5d876a729a347f8747d5ad548e1487eeea90eaf58894b3138"}, + {file = "tiktoken-0.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2048e1086b48e3c8c6e2ceeac866561374cd57a84622fa49a6b245ffecb7744"}, + {file = "tiktoken-0.6.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:07f229a5eb250b6403a61200199cecf0aac4aa23c3ecc1c11c1ca002cbb8f159"}, + {file = "tiktoken-0.6.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:432aa3be8436177b0db5a2b3e7cc28fd6c693f783b2f8722539ba16a867d0c6a"}, + {file = "tiktoken-0.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:8bfe8a19c8b5c40d121ee7938cd9c6a278e5b97dc035fd61714b4f0399d2f7a1"}, + {file = "tiktoken-0.6.0.tar.gz", hash = "sha256:ace62a4ede83c75b0374a2ddfa4b76903cf483e9cb06247f566be3bf14e6beed"}, +] + +[package.dependencies] +regex = ">=2022.1.18" +requests = ">=2.26.0" + +[package.extras] +blobfile = ["blobfile (>=2)"] + [[package]] name = "tokenizers" version = "0.15.2" @@ -6317,7 +6832,7 @@ optree = ["optree (>=0.9.1)"] name = "tornado" version = "6.4" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -optional = true +optional = false python-versions = ">= 3.8" files = [ {file = "tornado-6.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:02ccefc7d8211e5a7f9e8bc3f9e5b0ad6262ba2fbb683a6443ecc804e5224ce0"}, @@ -6357,7 +6872,7 @@ telegram = ["requests"] name = "traitlets" version = "5.14.3" description = "Traitlets Python configuration system" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, @@ -6487,6 +7002,21 @@ files = [ {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, ] +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +optional = true +python-versions = "*" +files = [ + {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, + {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + [[package]] name = "tzdata" version = "2024.1" @@ -6835,7 +7365,7 @@ anyio = ">=3.0.0" name = "wcwidth" version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" -optional = true +optional = false python-versions = "*" files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, diff --git a/pyproject.toml b/pyproject.toml index 7557fdce05..341601aeb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,6 +120,7 @@ rich = "^13.7.1" psycopg2 = { version = "^2.9.9", optional = true } pgvector = { version = "^0.2.5", optional = true } structlog = "^24.1.0" +llama-index = {version = "^0.10.30", optional = true} snowflake-snowpark-python = { version = "*",optional=true, python = ">=3.9,<3.12" } jinja2 = "^3.1.3" @@ -133,6 +134,7 @@ pytest-mock = "^3.12.0" ruff = "^0.3.0" black = "^24.2.0" pre-commit = "^3.7.0" +ipykernel = "^6.29.4" [tool.poetry.extras] chromadb = ["chromadb"] diff --git a/tests/retrieve/test_llama_index_rm.py b/tests/retrieve/test_llama_index_rm.py new file mode 100644 index 0000000000..35711087e9 --- /dev/null +++ b/tests/retrieve/test_llama_index_rm.py @@ -0,0 +1,61 @@ +import logging + +import pytest + +import dspy +from dsp.modules.dummy_lm import DummyLM +from dspy.datasets import HotPotQA + +try: + from llama_index.core import Settings, VectorStoreIndex + from llama_index.core.base.base_retriever import BaseRetriever + from llama_index.core.embeddings.mock_embed_model import MockEmbedding + from llama_index.core.readers.string_iterable import StringIterableReader + + from dspy.retrieve.llama_index_rm import LlamaIndexRM + +except ImportError: + logging.info("Optional dependency llama-index is not installed - skipping LlamaIndexRM tests.") + + +@pytest.fixture() +def rag_setup() -> dict: + """Builds the necessary fixtures to test LI""" + pytest.importorskip("llamaindex") + dataset = HotPotQA(train_seed=1, train_size=8, eval_seed=2023, dev_size=4, test_size=0) + trainset = [x.with_inputs("question") for x in dataset.train] + devset = [x.with_inputs("question") for x in dataset.dev] + ragset = [f"Question: {x.question} Answer: {x.answer}" for x in dataset.train] + dummyset = {x.question: x.answer for x in dataset.train} + + Settings.embed_model = MockEmbedding(8) + docs = StringIterableReader().load_data(texts=ragset) + index = VectorStoreIndex.from_documents(documents=docs) + retriever = index.as_retriever() + rm = LlamaIndexRM(retriever) + + return { + "index": index, + "retriever": retriever, + "rm": rm, + "lm": DummyLM(answers=dummyset), + "trainset": trainset, + "devset": devset, + } + + +def test_lirm_as_rm(rag_setup): + """Test the retriever as retriever method""" + pytest.importorskip("llamaindex") + retriever = rag_setup.get("retriever") + test_res_li = retriever.retrieve("At My Window was released by which American singer-songwriter?") + rm = rag_setup.get("rm") + test_res_dspy = rm.forward("At My Window was released by which American singer-songwriter?") + + assert isinstance(retriever, BaseRetriever), "Ensuring that the retriever is a LI Retriever object" + assert isinstance(test_res_li, list), "Ensuring results are a list from LI Retriever" + + assert isinstance(rm, dspy.Retrieve), "Ensuring the RM is a retriever object from dspy" + assert isinstance(test_res_dspy, list), "Ensuring the results are a list from the DSPy retriever" + + assert len(test_res_li) == len(test_res_dspy), "Rough equality check of the results"