From 16c464dfa2aaaa2864158321e3969288a4dedc43 Mon Sep 17 00:00:00 2001 From: Patrick Miramontes Date: Mon, 22 Apr 2024 19:52:02 +0000 Subject: [PATCH 1/7] feat(llama_index_rm): Llama Index retreivers compatibility. --- dspy/retrieve/llama_index_rm.py | 63 ++++ poetry.lock | 602 ++++++++++++++++++++++++++++++-- pyproject.toml | 2 + 3 files changed, 632 insertions(+), 35 deletions(-) create mode 100644 dspy/retrieve/llama_index_rm.py diff --git a/dspy/retrieve/llama_index_rm.py b/dspy/retrieve/llama_index_rm.py new file mode 100644 index 0000000000..0b861bdf40 --- /dev/null +++ b/dspy/retrieve/llama_index_rm.py @@ -0,0 +1,63 @@ +import dspy + +try: + from llama_index.core.base.base_retriever import BaseRetriever +except ImportError: + err = "The 'llama_index' package is required to use LlamaIndexRM. Install it with 'pip install llama_index'." + raise ImportError(err) from None + + +class LlamaIndexRM(dspy.Retrieve): + """Implements a retriever which wraps over a LlamaIndex retriever. + + This is done to bridge LlamaIndex and DSPy and allow the various retrieval + abstractions in LlamaIndex to be used in DSPy. + + To-do (maybe): + - Async support (DSPy lacks this entirely it seems, so not a priority until the rest of the repo catches on) + - Text/video retrieval (Available in LI, not sure if this will be a priority in DSPy) + + Args: + retriever (BaseRetriever): A LlamaIndex retriever object - text based only + + Returns: + DSPy RM Object - this is a retriever object that can be used in DSPy + """ + + retriever: BaseRetriever + + def __init__(self, retriever: BaseRetriever): + self.retriever = retriever + + @property + def similarity_top_k(self) -> int: + """Return similarity top k of retriever.""" + return self.retriever.similarity_top_k + + @similarity_top_k.setter + def similarity_top_k(self, k: int) -> None: + """Set similarity top k of retriever.""" + self.retriever.similarity_top_k = k + + def forward(self, query: str) -> list[dspy.Example]: + """Forward function for the LI retriever. + + This is the function that is called to retrieve the top k examples for a given query. + + Top k is set via the setter similarity_top_k or at LI instantiation. + + Args: + query (str): The query to retrieve examples for + + Returns: + List[dspy.Example]: A list of examples retrieved by the retriever + """ + raw = self.retriever.retrieve(query) + + return [ + dspy.Example( + text=result.text, + score=result.score, + ) + for result in raw + ] diff --git a/poetry.lock b/poetry.lock index 1d68a9da77..3cd0daec9e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -201,7 +201,7 @@ trio = ["trio (>=0.23)"] name = "appnope" version = "0.1.4" description = "Disable App Nap on macOS >= 10.9" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c"}, @@ -269,7 +269,7 @@ tests = ["pytest"] name = "asttokens" version = "2.4.1" description = "Annotate AST trees with source code positions" -optional = true +optional = false python-versions = "*" files = [ {file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"}, @@ -467,7 +467,7 @@ typecheck = ["mypy"] name = "beautifulsoup4" version = "4.12.3" description = "Screen-scraping library" -optional = true +optional = false python-versions = ">=3.6.0" files = [ {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, @@ -597,7 +597,7 @@ files = [ name = "cffi" version = "1.16.0" description = "Foreign Function Interface for Python calling C code." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, @@ -904,7 +904,7 @@ development = ["black", "flake8", "mypy", "pytest", "types-colorama"] name = "comm" version = "0.2.2" description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"}, @@ -971,6 +971,21 @@ ssh = ["bcrypt (>=3.1.5)"] test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] +[[package]] +name = "dataclasses-json" +version = "0.6.4" +description = "Easily serialize dataclasses to and from JSON." +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "dataclasses_json-0.6.4-py3-none-any.whl", hash = "sha256:f90578b8a3177f7552f4e1a6e535e84293cd5da421fcce0642d49c0d7bdf8df2"}, + {file = "dataclasses_json-0.6.4.tar.gz", hash = "sha256:73696ebf24936560cca79a2430cbc4f3dd23ac7bf46ed17f38e5e5e7657a6377"}, +] + +[package.dependencies] +marshmallow = ">=3.18.0,<4.0.0" +typing-inspect = ">=0.4.0,<1" + [[package]] name = "datasets" version = "2.18.0" @@ -1019,7 +1034,7 @@ vision = ["Pillow (>=6.2.1)"] name = "debugpy" version = "1.8.1" description = "An implementation of the Debug Adapter Protocol for Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "debugpy-1.8.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:3bda0f1e943d386cc7a0e71bfa59f4137909e2ed947fb3946c506e113000f741"}, @@ -1050,7 +1065,7 @@ files = [ name = "decorator" version = "5.1.1" description = "Decorators for Humans" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, @@ -1061,7 +1076,7 @@ files = [ name = "deprecated" version = "1.2.14" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, @@ -1089,6 +1104,17 @@ files = [ graph = ["objgraph (>=1.7.2)"] profile = ["gprof2dot (>=2022.7.29)"] +[[package]] +name = "dirtyjson" +version = "1.0.8" +description = "JSON decoder for Python that can extract data from the muck" +optional = false +python-versions = "*" +files = [ + {file = "dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53"}, + {file = "dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd"}, +] + [[package]] name = "distlib" version = "0.3.8" @@ -1181,7 +1207,7 @@ test = ["pytest (>=6)"] name = "executing" version = "2.0.1" description = "Get the currently executing AST node of a frame, and other information" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"}, @@ -1998,7 +2024,7 @@ files = [ name = "ipykernel" version = "6.29.4" description = "IPython Kernel for Jupyter" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "ipykernel-6.29.4-py3-none-any.whl", hash = "sha256:1181e653d95c6808039c509ef8e67c4126b3b3af7781496c7cbfb5ed938a27da"}, @@ -2031,7 +2057,7 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio name = "ipython" version = "8.18.1" description = "IPython: Productive Interactive Computing" -optional = true +optional = false python-versions = ">=3.9" files = [ {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"}, @@ -2082,7 +2108,7 @@ six = "*" name = "jedi" version = "0.19.1" description = "An autocompletion tool for Python that can be used for text editors." -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "jedi-0.19.1-py2.py3-none-any.whl", hash = "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0"}, @@ -2202,7 +2228,7 @@ testing = ["coverage", "ipykernel", "jupytext", "matplotlib", "nbdime", "nbforma name = "jupyter-client" version = "8.6.1" description = "Jupyter protocol implementation and client libraries" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "jupyter_client-8.6.1-py3-none-any.whl", hash = "sha256:3b7bd22f058434e3b9a7ea4b1500ed47de2713872288c0d511d19926f99b459f"}, @@ -2225,7 +2251,7 @@ test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pyt name = "jupyter-core" version = "5.7.2" description = "Jupyter core package. A base package on which Jupyter projects rely." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409"}, @@ -2267,6 +2293,300 @@ websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0" [package.extras] adal = ["adal (>=1.0.2)"] +[[package]] +name = "llama-index" +version = "0.10.30" +description = "Interface between LLMs and your data" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index-0.10.30-py3-none-any.whl", hash = "sha256:7083014be8f74c6037cc33d629985a7505ee3c89b972503f8dd7d4ac7c45a4f4"}, + {file = "llama_index-0.10.30.tar.gz", hash = "sha256:58fab1047c75e39803e8c394fab7d5e81ce97f27281bf7cba6bea40a7328562e"}, +] + +[package.dependencies] +llama-index-agent-openai = ">=0.1.4,<0.3.0" +llama-index-cli = ">=0.1.2,<0.2.0" +llama-index-core = ">=0.10.30,<0.11.0" +llama-index-embeddings-openai = ">=0.1.5,<0.2.0" +llama-index-indices-managed-llama-cloud = ">=0.1.2,<0.2.0" +llama-index-legacy = ">=0.9.48,<0.10.0" +llama-index-llms-openai = ">=0.1.13,<0.2.0" +llama-index-multi-modal-llms-openai = ">=0.1.3,<0.2.0" +llama-index-program-openai = ">=0.1.3,<0.2.0" +llama-index-question-gen-openai = ">=0.1.2,<0.2.0" +llama-index-readers-file = ">=0.1.4,<0.2.0" +llama-index-readers-llama-parse = ">=0.1.2,<0.2.0" + +[[package]] +name = "llama-index-agent-openai" +version = "0.2.2" +description = "llama-index agent openai integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_agent_openai-0.2.2-py3-none-any.whl", hash = "sha256:fa8cbc2c7be5a465848f8d5b432db01c55f07dfa06357edb7fb77fb17d534d1e"}, + {file = "llama_index_agent_openai-0.2.2.tar.gz", hash = "sha256:12063dd932c74015796f973986cc52d783f51fda38e4ead72a56d0fd195925ee"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" +llama-index-llms-openai = ">=0.1.5,<0.2.0" +openai = ">=1.14.0" + +[[package]] +name = "llama-index-cli" +version = "0.1.12" +description = "llama-index cli" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_cli-0.1.12-py3-none-any.whl", hash = "sha256:d80d546786f02d3f16f6183b8e86b22b8b5c33a1500923659f2ccbff8d5df634"}, + {file = "llama_index_cli-0.1.12.tar.gz", hash = "sha256:3cf1f706c3c69c6b1aab07fca7faad3959db1709808efd50491b669d38b0b580"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.11.post1,<0.11.0" +llama-index-embeddings-openai = ">=0.1.1,<0.2.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" + +[[package]] +name = "llama-index-core" +version = "0.10.30" +description = "Interface between LLMs and your data" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_core-0.10.30-py3-none-any.whl", hash = "sha256:2f291ce2975f9dbf0ea87d684d3d8122ce216265f468f32baa2cf4ecfb34ed2a"}, + {file = "llama_index_core-0.10.30.tar.gz", hash = "sha256:bed3f683606a0b0eb0839677c935a4b57b7bae509a95d380e51c6225630660e0"}, +] + +[package.dependencies] +aiohttp = ">=3.8.6,<4.0.0" +dataclasses-json = "*" +deprecated = ">=1.2.9.3" +dirtyjson = ">=1.0.8,<2.0.0" +fsspec = ">=2023.5.0" +httpx = "*" +llamaindex-py-client = ">=0.1.18,<0.2.0" +nest-asyncio = ">=1.5.8,<2.0.0" +networkx = ">=3.0" +nltk = ">=3.8.1,<4.0.0" +numpy = "*" +openai = ">=1.1.0" +pandas = "*" +pillow = ">=9.0.0" +PyYAML = ">=6.0.1" +requests = ">=2.31.0" +SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]} +tenacity = ">=8.2.0,<9.0.0" +tiktoken = ">=0.3.3" +tqdm = ">=4.66.1,<5.0.0" +typing-extensions = ">=4.5.0" +typing-inspect = ">=0.8.0" +wrapt = "*" + +[package.extras] +gradientai = ["gradientai (>=1.4.0)"] +html = ["beautifulsoup4 (>=4.12.2,<5.0.0)"] +langchain = ["langchain (>=0.0.303)"] +local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1.99,<0.2.0)", "transformers[torch] (>=4.33.1,<5.0.0)"] +postgres = ["asyncpg (>=0.29.0,<0.30.0)", "pgvector (>=0.2.4,<0.3.0)", "psycopg2-binary (>=2.9.9,<3.0.0)"] +query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn", "spacy (>=3.7.1,<4.0.0)"] + +[[package]] +name = "llama-index-embeddings-openai" +version = "0.1.8" +description = "llama-index embeddings openai integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_embeddings_openai-0.1.8-py3-none-any.whl", hash = "sha256:186be52c29a8c82a39ff6e033e3a61b9c5a17cf6177a5507abe4f5c79fea3a53"}, + {file = "llama_index_embeddings_openai-0.1.8.tar.gz", hash = "sha256:aae4ce3ec2fe2cb604d944646982c71663a8b99ed5f8c370823ee82eef4ddd3b"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" + +[[package]] +name = "llama-index-indices-managed-llama-cloud" +version = "0.1.5" +description = "llama-index indices llama-cloud integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_indices_managed_llama_cloud-0.1.5-py3-none-any.whl", hash = "sha256:79f636cb6f4fabb12fec153564110f7f4dfda3cacb087793a5fec988484d7d2c"}, + {file = "llama_index_indices_managed_llama_cloud-0.1.5.tar.gz", hash = "sha256:47cdde9f06bbddd508f0efcf41de425e85171ac2c8fda8a5fb2a89673e1c8c71"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.0,<0.11.0" +llamaindex-py-client = ">=0.1.13,<0.2.0" + +[[package]] +name = "llama-index-legacy" +version = "0.9.48" +description = "Interface between LLMs and your data" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "llama_index_legacy-0.9.48-py3-none-any.whl", hash = "sha256:714ada95beac179b4acefa4d2deff74bb7b2f22b0f699ac247d4cb67738d16d4"}, + {file = "llama_index_legacy-0.9.48.tar.gz", hash = "sha256:82ddc4691edbf49533d65582c249ba22c03fe96fbd3e92f7758dccef28e43834"}, +] + +[package.dependencies] +aiohttp = ">=3.8.6,<4.0.0" +dataclasses-json = "*" +deprecated = ">=1.2.9.3" +dirtyjson = ">=1.0.8,<2.0.0" +fsspec = ">=2023.5.0" +httpx = "*" +nest-asyncio = ">=1.5.8,<2.0.0" +networkx = ">=3.0" +nltk = ">=3.8.1,<4.0.0" +numpy = "*" +openai = ">=1.1.0" +pandas = "*" +requests = ">=2.31.0" +SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]} +tenacity = ">=8.2.0,<9.0.0" +tiktoken = ">=0.3.3" +typing-extensions = ">=4.5.0" +typing-inspect = ">=0.8.0" + +[package.extras] +gradientai = ["gradientai (>=1.4.0)"] +html = ["beautifulsoup4 (>=4.12.2,<5.0.0)"] +langchain = ["langchain (>=0.0.303)"] +local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1.99,<0.2.0)", "transformers[torch] (>=4.33.1,<5.0.0)"] +postgres = ["asyncpg (>=0.28.0,<0.29.0)", "pgvector (>=0.1.0,<0.2.0)", "psycopg2-binary (>=2.9.9,<3.0.0)"] +query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn", "spacy (>=3.7.1,<4.0.0)"] + +[[package]] +name = "llama-index-llms-openai" +version = "0.1.16" +description = "llama-index llms openai integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_llms_openai-0.1.16-py3-none-any.whl", hash = "sha256:4a9c0fe969302731907c8fa31631812397637e114a44ebbad11fd6c59def9315"}, + {file = "llama_index_llms_openai-0.1.16.tar.gz", hash = "sha256:313bbc17c241992430a6bf686a1b1edc4276c8256ad6b0550aa1bea1e0fed1a6"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.24,<0.11.0" + +[[package]] +name = "llama-index-multi-modal-llms-openai" +version = "0.1.5" +description = "llama-index multi-modal-llms openai integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_multi_modal_llms_openai-0.1.5-py3-none-any.whl", hash = "sha256:bb332580e7e4b5f2f87488b3649d2ceb53ee82c848e59694578a982c3982ce0b"}, + {file = "llama_index_multi_modal_llms_openai-0.1.5.tar.gz", hash = "sha256:9a237f4f886d1e20c27e9493e80b3e1f8753859481ff1b58fe25b7aa39b198a2"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" + +[[package]] +name = "llama-index-program-openai" +version = "0.1.5" +description = "llama-index program openai integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_program_openai-0.1.5-py3-none-any.whl", hash = "sha256:20b6efa706ac73e4dc5086900fea1ffcb1eb0787c8a6f081669d37da7235aee0"}, + {file = "llama_index_program_openai-0.1.5.tar.gz", hash = "sha256:c33aa2d2876ad0ff1f9a2a755d4e7d4917240847d0174e7b2d0b8474499bb700"}, +] + +[package.dependencies] +llama-index-agent-openai = ">=0.1.1,<0.3.0" +llama-index-core = ">=0.10.1,<0.11.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" + +[[package]] +name = "llama-index-question-gen-openai" +version = "0.1.3" +description = "llama-index question_gen openai integration" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "llama_index_question_gen_openai-0.1.3-py3-none-any.whl", hash = "sha256:1f83b49e8b2e665030d1ec8c54687d6985d9fa8426147b64e46628a9e489b302"}, + {file = "llama_index_question_gen_openai-0.1.3.tar.gz", hash = "sha256:4486198117a45457d2e036ae60b93af58052893cc7d78fa9b6f47dd47b81e2e1"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.1,<0.11.0" +llama-index-llms-openai = ">=0.1.1,<0.2.0" +llama-index-program-openai = ">=0.1.1,<0.2.0" + +[[package]] +name = "llama-index-readers-file" +version = "0.1.19" +description = "llama-index readers file integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_readers_file-0.1.19-py3-none-any.whl", hash = "sha256:25eb6d066dc38753de435e876ef8511c68d84102302c053b7dcb0776db68fced"}, + {file = "llama_index_readers_file-0.1.19.tar.gz", hash = "sha256:194c1b9b85c265159b7302c7d80adba937aab06f05c170af7fd95c4e7a8aec35"}, +] + +[package.dependencies] +beautifulsoup4 = ">=4.12.3,<5.0.0" +llama-index-core = ">=0.10.1,<0.11.0" +pypdf = ">=4.0.1,<5.0.0" +striprtf = ">=0.0.26,<0.0.27" + +[package.extras] +pymupdf = ["pymupdf (>=1.23.21,<2.0.0)"] + +[[package]] +name = "llama-index-readers-llama-parse" +version = "0.1.4" +description = "llama-index readers llama-parse integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_readers_llama_parse-0.1.4-py3-none-any.whl", hash = "sha256:c4914b37d12cceee56fbd185cca80f87d60acbf8ea7a73f9719610180be1fcdd"}, + {file = "llama_index_readers_llama_parse-0.1.4.tar.gz", hash = "sha256:78608b193c818894aefeee0aa303f02b7f80f2e4caf13866c2fd3b0b1023e2c0"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.7,<0.11.0" +llama-parse = ">=0.4.0,<0.5.0" + +[[package]] +name = "llama-parse" +version = "0.4.1" +description = "Parse files into RAG-Optimized formats." +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_parse-0.4.1-py3-none-any.whl", hash = "sha256:2c08962b66791c61fc360ae2042f953729c7b8decc3590d01fea5a98ca1f6676"}, + {file = "llama_parse-0.4.1.tar.gz", hash = "sha256:d723af84d6a1fc99eb431915d21865d20b76d8a246dbaa124d1f96c956a644f7"}, +] + +[package.dependencies] +llama-index-core = ">=0.10.29" + +[[package]] +name = "llamaindex-py-client" +version = "0.1.18" +description = "" +optional = false +python-versions = "<4,>=3.8" +files = [ + {file = "llamaindex_py_client-0.1.18-py3-none-any.whl", hash = "sha256:5417e41666504a77ecf5bdd9b403ffff1d714880ee30d49e234fb7686177eeeb"}, + {file = "llamaindex_py_client-0.1.18.tar.gz", hash = "sha256:091ee49a92592e3894777ade12516c2137093f9d6441a549f406461917ce9b7e"}, +] + +[package.dependencies] +httpx = ">=0.20.0" +pydantic = ">=1.10" + [[package]] name = "loguru" version = "0.7.2" @@ -2450,7 +2770,7 @@ urllib3 = "*" name = "marshmallow" version = "3.21.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "marshmallow-3.21.1-py3-none-any.whl", hash = "sha256:f085493f79efb0644f270a9bf2892843142d80d7174bbbd2f3713f2a589dc633"}, @@ -2469,7 +2789,7 @@ tests = ["pytest", "pytz", "simplejson"] name = "matplotlib-inline" version = "0.1.7" description = "Inline Matplotlib backend for Jupyter" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"}, @@ -2981,7 +3301,7 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] name = "nest-asyncio" version = "1.6.0" description = "Patch asyncio to allow nested event loops" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, @@ -3006,6 +3326,31 @@ doc = ["nb2plots (>=0.7)", "nbconvert (<7.9)", "numpydoc (>=1.6)", "pillow (>=9. extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.11)", "sympy (>=1.10)"] test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] +[[package]] +name = "nltk" +version = "3.8.1" +description = "Natural Language Toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, + {file = "nltk-3.8.1.zip", hash = "sha256:1834da3d0682cba4f2cede2f9aad6b0fafb6461ba451db0efb6f9c39798d64d3"}, +] + +[package.dependencies] +click = "*" +joblib = "*" +regex = ">=2021.8.3" +tqdm = "*" + +[package.extras] +all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"] +corenlp = ["requests"] +machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"] +plot = ["matplotlib"] +tgrep = ["pyparsing"] +twitter = ["twython"] + [[package]] name = "nodeenv" version = "1.8.0" @@ -3480,6 +3825,7 @@ optional = false python-versions = ">=3.9" files = [ {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, @@ -3500,6 +3846,7 @@ files = [ {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, @@ -3546,7 +3893,7 @@ xml = ["lxml (>=4.9.2)"] name = "parso" version = "0.8.4" description = "A Python Parser" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"}, @@ -3572,7 +3919,7 @@ files = [ name = "pexpect" version = "4.9.0" description = "Pexpect allows easy control of interactive console applications." -optional = true +optional = false python-versions = "*" files = [ {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"}, @@ -3595,6 +3942,92 @@ files = [ [package.dependencies] numpy = "*" +[[package]] +name = "pillow" +version = "10.3.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45"}, + {file = "pillow-10.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c"}, + {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78618cdbccaa74d3f88d0ad6cb8ac3007f1a6fa5c6f19af64b55ca170bfa1edf"}, + {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261ddb7ca91fcf71757979534fb4c128448b5b4c55cb6152d280312062f69599"}, + {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ce49c67f4ea0609933d01c0731b34b8695a7a748d6c8d186f95e7d085d2fe475"}, + {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b14f16f94cbc61215115b9b1236f9c18403c15dd3c52cf629072afa9d54c1cbf"}, + {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d33891be6df59d93df4d846640f0e46f1a807339f09e79a8040bc887bdcd7ed3"}, + {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b50811d664d392f02f7761621303eba9d1b056fb1868c8cdf4231279645c25f5"}, + {file = "pillow-10.3.0-cp310-cp310-win32.whl", hash = "sha256:ca2870d5d10d8726a27396d3ca4cf7976cec0f3cb706debe88e3a5bd4610f7d2"}, + {file = "pillow-10.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f"}, + {file = "pillow-10.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:ccce24b7ad89adb5a1e34a6ba96ac2530046763912806ad4c247356a8f33a67b"}, + {file = "pillow-10.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795"}, + {file = "pillow-10.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57"}, + {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdcbb4068117dfd9ce0138d068ac512843c52295ed996ae6dd1faf537b6dbc27"}, + {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9797a6c8fe16f25749b371c02e2ade0efb51155e767a971c61734b1bf6293994"}, + {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:9e91179a242bbc99be65e139e30690e081fe6cb91a8e77faf4c409653de39451"}, + {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b87bd9d81d179bd8ab871603bd80d8645729939f90b71e62914e816a76fc6bd"}, + {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:81d09caa7b27ef4e61cb7d8fbf1714f5aec1c6b6c5270ee53504981e6e9121ad"}, + {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c"}, + {file = "pillow-10.3.0-cp311-cp311-win32.whl", hash = "sha256:7161ec49ef0800947dc5570f86568a7bb36fa97dd09e9827dc02b718c5643f09"}, + {file = "pillow-10.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:8eb0908e954d093b02a543dc963984d6e99ad2b5e36503d8a0aaf040505f747d"}, + {file = "pillow-10.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:4e6f7d1c414191c1199f8996d3f2282b9ebea0945693fb67392c75a3a320941f"}, + {file = "pillow-10.3.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:e46f38133e5a060d46bd630faa4d9fa0202377495df1f068a8299fd78c84de84"}, + {file = "pillow-10.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:50b8eae8f7334ec826d6eeffaeeb00e36b5e24aa0b9df322c247539714c6df19"}, + {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d3bea1c75f8c53ee4d505c3e67d8c158ad4df0d83170605b50b64025917f338"}, + {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19aeb96d43902f0a783946a0a87dbdad5c84c936025b8419da0a0cd7724356b1"}, + {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74d28c17412d9caa1066f7a31df8403ec23d5268ba46cd0ad2c50fb82ae40462"}, + {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ff61bfd9253c3915e6d41c651d5f962da23eda633cf02262990094a18a55371a"}, + {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d886f5d353333b4771d21267c7ecc75b710f1a73d72d03ca06df49b09015a9ef"}, + {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b5ec25d8b17217d635f8935dbc1b9aa5907962fae29dff220f2659487891cd3"}, + {file = "pillow-10.3.0-cp312-cp312-win32.whl", hash = "sha256:51243f1ed5161b9945011a7360e997729776f6e5d7005ba0c6879267d4c5139d"}, + {file = "pillow-10.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:412444afb8c4c7a6cc11a47dade32982439925537e483be7c0ae0cf96c4f6a0b"}, + {file = "pillow-10.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:798232c92e7665fe82ac085f9d8e8ca98826f8e27859d9a96b41d519ecd2e49a"}, + {file = "pillow-10.3.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:4eaa22f0d22b1a7e93ff0a596d57fdede2e550aecffb5a1ef1106aaece48e96b"}, + {file = "pillow-10.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cd5e14fbf22a87321b24c88669aad3a51ec052eb145315b3da3b7e3cc105b9a2"}, + {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1530e8f3a4b965eb6a7785cf17a426c779333eb62c9a7d1bbcf3ffd5bf77a4aa"}, + {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d512aafa1d32efa014fa041d38868fda85028e3f930a96f85d49c7d8ddc0383"}, + {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:339894035d0ede518b16073bdc2feef4c991ee991a29774b33e515f1d308e08d"}, + {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:aa7e402ce11f0885305bfb6afb3434b3cd8f53b563ac065452d9d5654c7b86fd"}, + {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0ea2a783a2bdf2a561808fe4a7a12e9aa3799b701ba305de596bc48b8bdfce9d"}, + {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c78e1b00a87ce43bb37642c0812315b411e856a905d58d597750eb79802aaaa3"}, + {file = "pillow-10.3.0-cp38-cp38-win32.whl", hash = "sha256:72d622d262e463dfb7595202d229f5f3ab4b852289a1cd09650362db23b9eb0b"}, + {file = "pillow-10.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:2034f6759a722da3a3dbd91a81148cf884e91d1b747992ca288ab88c1de15999"}, + {file = "pillow-10.3.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2ed854e716a89b1afcedea551cd85f2eb2a807613752ab997b9974aaa0d56936"}, + {file = "pillow-10.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dc1a390a82755a8c26c9964d457d4c9cbec5405896cba94cf51f36ea0d855002"}, + {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4203efca580f0dd6f882ca211f923168548f7ba334c189e9eab1178ab840bf60"}, + {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3102045a10945173d38336f6e71a8dc71bcaeed55c3123ad4af82c52807b9375"}, + {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:6fb1b30043271ec92dc65f6d9f0b7a830c210b8a96423074b15c7bc999975f57"}, + {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8"}, + {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b09b86b27a064c9624d0a6c54da01c1beaf5b6cadfa609cf63789b1d08a797b9"}, + {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d3b2348a78bc939b4fed6552abfd2e7988e0f81443ef3911a4b8498ca084f6eb"}, + {file = "pillow-10.3.0-cp39-cp39-win32.whl", hash = "sha256:45ebc7b45406febf07fef35d856f0293a92e7417ae7933207e90bf9090b70572"}, + {file = "pillow-10.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:0ba26351b137ca4e0db0342d5d00d2e355eb29372c05afd544ebf47c0956ffeb"}, + {file = "pillow-10.3.0-cp39-cp39-win_arm64.whl", hash = "sha256:50fd3f6b26e3441ae07b7c979309638b72abc1a25da31a81a7fbd9495713ef4f"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:6b02471b72526ab8a18c39cb7967b72d194ec53c1fd0a70b050565a0f366d355"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8ab74c06ffdab957d7670c2a5a6e1a70181cd10b727cd788c4dd9005b6a8acd9"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2ec1e921fd07c7cda7962bad283acc2f2a9ccc1b971ee4b216b75fad6f0463"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c8e73e99da7db1b4cad7f8d682cf6abad7844da39834c288fbfa394a47bbced"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:16563993329b79513f59142a6b02055e10514c1a8e86dca8b48a893e33cf91e3"}, + {file = "pillow-10.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:aff76a55a8aa8364d25400a210a65ff59d0168e0b4285ba6bf2bd83cf675ba32"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b7bc2176354defba3edc2b9a777744462da2f8e921fbaf61e52acb95bafa9828"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:793b4e24db2e8742ca6423d3fde8396db336698c55cd34b660663ee9e45ed37f"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c83341b89884e2b2e55886e8fbbf37c3fa5efd6c8907124aeb72f285ae5696e5"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1a1d1915db1a4fdb2754b9de292642a39a7fb28f1736699527bb649484fb966a"}, + {file = "pillow-10.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a0eaa93d054751ee9964afa21c06247779b90440ca41d184aeb5d410f20ff591"}, + {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions"] +xmp = ["defusedxml"] + [[package]] name = "pinecone-client" version = "2.2.4" @@ -3714,7 +4147,7 @@ virtualenv = ">=20.10.0" name = "prompt-toolkit" version = "3.0.43" description = "Library for building powerful interactive command lines in Python" -optional = true +optional = false python-versions = ">=3.7.0" files = [ {file = "prompt_toolkit-3.0.43-py3-none-any.whl", hash = "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6"}, @@ -3748,7 +4181,7 @@ files = [ name = "psutil" version = "5.9.8" description = "Cross-platform lib for process and system monitoring in Python." -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"}, @@ -3798,7 +4231,7 @@ files = [ name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" -optional = true +optional = false python-versions = "*" files = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, @@ -3856,7 +4289,7 @@ functions = ["apache-bookkeeper-client (>=4.16.1)", "grpcio (>=1.60.0)", "promet name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" -optional = true +optional = false python-versions = "*" files = [ {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, @@ -3965,7 +4398,7 @@ pyasn1 = ">=0.4.6,<0.7.0" name = "pycparser" version = "2.22" description = "C parser in Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, @@ -4215,6 +4648,27 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pypdf" +version = "4.2.0" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pypdf-4.2.0-py3-none-any.whl", hash = "sha256:dc035581664e0ad717e3492acebc1a5fc23dba759e788e3d4a9fc9b1a32e72c1"}, + {file = "pypdf-4.2.0.tar.gz", hash = "sha256:fe63f3f7d1dcda1c9374421a94c1bba6c6f8c4a62173a59b64ffd52058f846b1"}, +] + +[package.dependencies] +typing_extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} + +[package.extras] +crypto = ["PyCryptodome", "cryptography"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "pytest-socket", "pytest-timeout", "pytest-xdist", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow (>=8.0.0)", "PyCryptodome", "cryptography"] +image = ["Pillow (>=8.0.0)"] + [[package]] name = "pypika" version = "0.48.9" @@ -4320,7 +4774,7 @@ files = [ name = "pywin32" version = "306" description = "Python for Window Extensions" -optional = true +optional = false python-versions = "*" files = [ {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, @@ -4417,7 +4871,7 @@ pyyaml = "*" name = "pyzmq" version = "26.0.0" description = "Python bindings for 0MQ" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "pyzmq-26.0.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:a86409f3f8eae7af5a47babd831a119bdf552e831f04d2225a313305e8e35e7c"}, @@ -5061,7 +5515,7 @@ files = [ name = "soupsieve" version = "2.5" description = "A modern CSS selector implementation for Beautiful Soup." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, @@ -5360,7 +5814,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +greenlet = {version = "!=0.4.17", optional = true, markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\" or extra == \"asyncio\""} typing-extensions = ">=4.6.0" [package.extras] @@ -5392,7 +5846,7 @@ sqlcipher = ["sqlcipher3_binary"] name = "stack-data" version = "0.6.3" description = "Extract data from python stack frames and tracebacks for informative displays" -optional = true +optional = false python-versions = "*" files = [ {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"}, @@ -5425,6 +5879,17 @@ typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\"" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] +[[package]] +name = "striprtf" +version = "0.0.26" +description = "A simple library to convert rtf to text" +optional = false +python-versions = "*" +files = [ + {file = "striprtf-0.0.26-py3-none-any.whl", hash = "sha256:8c8f9d32083cdc2e8bfb149455aa1cc5a4e0a035893bedc75db8b73becb3a1bb"}, + {file = "striprtf-0.0.26.tar.gz", hash = "sha256:fdb2bba7ac440072d1c41eab50d8d74ae88f60a8b6575c6e2c7805dc462093aa"}, +] + [[package]] name = "structlog" version = "24.1.0" @@ -5474,7 +5939,7 @@ widechars = ["wcwidth"] name = "tenacity" version = "8.2.3" description = "Retry code until it succeeds" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, @@ -5484,6 +5949,58 @@ files = [ [package.extras] doc = ["reno", "sphinx", "tornado (>=4.5)"] +[[package]] +name = "tiktoken" +version = "0.6.0" +description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tiktoken-0.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:277de84ccd8fa12730a6b4067456e5cf72fef6300bea61d506c09e45658d41ac"}, + {file = "tiktoken-0.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9c44433f658064463650d61387623735641dcc4b6c999ca30bc0f8ba3fccaf5c"}, + {file = "tiktoken-0.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afb9a2a866ae6eef1995ab656744287a5ac95acc7e0491c33fad54d053288ad3"}, + {file = "tiktoken-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c62c05b3109fefca26fedb2820452a050074ad8e5ad9803f4652977778177d9f"}, + {file = "tiktoken-0.6.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0ef917fad0bccda07bfbad835525bbed5f3ab97a8a3e66526e48cdc3e7beacf7"}, + {file = "tiktoken-0.6.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e095131ab6092d0769a2fda85aa260c7c383072daec599ba9d8b149d2a3f4d8b"}, + {file = "tiktoken-0.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:05b344c61779f815038292a19a0c6eb7098b63c8f865ff205abb9ea1b656030e"}, + {file = "tiktoken-0.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cefb9870fb55dca9e450e54dbf61f904aab9180ff6fe568b61f4db9564e78871"}, + {file = "tiktoken-0.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:702950d33d8cabc039845674107d2e6dcabbbb0990ef350f640661368df481bb"}, + {file = "tiktoken-0.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8d49d076058f23254f2aff9af603863c5c5f9ab095bc896bceed04f8f0b013a"}, + {file = "tiktoken-0.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:430bc4e650a2d23a789dc2cdca3b9e5e7eb3cd3935168d97d43518cbb1f9a911"}, + {file = "tiktoken-0.6.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:293cb8669757301a3019a12d6770bd55bec38a4d3ee9978ddbe599d68976aca7"}, + {file = "tiktoken-0.6.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7bd1a288b7903aadc054b0e16ea78e3171f70b670e7372432298c686ebf9dd47"}, + {file = "tiktoken-0.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac76e000183e3b749634968a45c7169b351e99936ef46f0d2353cd0d46c3118d"}, + {file = "tiktoken-0.6.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:17cc8a4a3245ab7d935c83a2db6bb71619099d7284b884f4b2aea4c74f2f83e3"}, + {file = "tiktoken-0.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:284aebcccffe1bba0d6571651317df6a5b376ff6cfed5aeb800c55df44c78177"}, + {file = "tiktoken-0.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c1a3a5d33846f8cd9dd3b7897c1d45722f48625a587f8e6f3d3e85080559be8"}, + {file = "tiktoken-0.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6318b2bb2337f38ee954fd5efa82632c6e5ced1d52a671370fa4b2eff1355e91"}, + {file = "tiktoken-0.6.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f5f0f2ed67ba16373f9a6013b68da298096b27cd4e1cf276d2d3868b5c7efd1"}, + {file = "tiktoken-0.6.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:75af4c0b16609c2ad02581f3cdcd1fb698c7565091370bf6c0cf8624ffaba6dc"}, + {file = "tiktoken-0.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:45577faf9a9d383b8fd683e313cf6df88b6076c034f0a16da243bb1c139340c3"}, + {file = "tiktoken-0.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7c1492ab90c21ca4d11cef3a236ee31a3e279bb21b3fc5b0e2210588c4209e68"}, + {file = "tiktoken-0.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e2b380c5b7751272015400b26144a2bab4066ebb8daae9c3cd2a92c3b508fe5a"}, + {file = "tiktoken-0.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9f497598b9f58c99cbc0eb764b4a92272c14d5203fc713dd650b896a03a50ad"}, + {file = "tiktoken-0.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e65e8bd6f3f279d80f1e1fbd5f588f036b9a5fa27690b7f0cc07021f1dfa0839"}, + {file = "tiktoken-0.6.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5f1495450a54e564d236769d25bfefbf77727e232d7a8a378f97acddee08c1ae"}, + {file = "tiktoken-0.6.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6c4e4857d99f6fb4670e928250835b21b68c59250520a1941618b5b4194e20c3"}, + {file = "tiktoken-0.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:168d718f07a39b013032741867e789971346df8e89983fe3c0ef3fbd5a0b1cb9"}, + {file = "tiktoken-0.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:47fdcfe11bd55376785a6aea8ad1db967db7f66ea81aed5c43fad497521819a4"}, + {file = "tiktoken-0.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fb7d2ccbf1a7784810aff6b80b4012fb42c6fc37eaa68cb3b553801a5cc2d1fc"}, + {file = "tiktoken-0.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ccb7a111ee76af5d876a729a347f8747d5ad548e1487eeea90eaf58894b3138"}, + {file = "tiktoken-0.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2048e1086b48e3c8c6e2ceeac866561374cd57a84622fa49a6b245ffecb7744"}, + {file = "tiktoken-0.6.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:07f229a5eb250b6403a61200199cecf0aac4aa23c3ecc1c11c1ca002cbb8f159"}, + {file = "tiktoken-0.6.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:432aa3be8436177b0db5a2b3e7cc28fd6c693f783b2f8722539ba16a867d0c6a"}, + {file = "tiktoken-0.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:8bfe8a19c8b5c40d121ee7938cd9c6a278e5b97dc035fd61714b4f0399d2f7a1"}, + {file = "tiktoken-0.6.0.tar.gz", hash = "sha256:ace62a4ede83c75b0374a2ddfa4b76903cf483e9cb06247f566be3bf14e6beed"}, +] + +[package.dependencies] +regex = ">=2022.1.18" +requests = ">=2.26.0" + +[package.extras] +blobfile = ["blobfile (>=2)"] + [[package]] name = "tokenizers" version = "0.15.2" @@ -5695,7 +6212,7 @@ optree = ["optree (>=0.9.1)"] name = "tornado" version = "6.4" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -optional = true +optional = false python-versions = ">= 3.8" files = [ {file = "tornado-6.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:02ccefc7d8211e5a7f9e8bc3f9e5b0ad6262ba2fbb683a6443ecc804e5224ce0"}, @@ -5735,7 +6252,7 @@ telegram = ["requests"] name = "traitlets" version = "5.14.2" description = "Traitlets Python configuration system" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "traitlets-5.14.2-py3-none-any.whl", hash = "sha256:fcdf85684a772ddeba87db2f398ce00b40ff550d1528c03c14dbf6a02003cd80"}, @@ -5865,6 +6382,21 @@ files = [ {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, ] +[[package]] +name = "typing-inspect" +version = "0.9.0" +description = "Runtime inspection utilities for typing module." +optional = false +python-versions = "*" +files = [ + {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, + {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"}, +] + +[package.dependencies] +mypy-extensions = ">=0.3.0" +typing-extensions = ">=3.7.4" + [[package]] name = "tzdata" version = "2024.1" @@ -6241,7 +6773,7 @@ anyio = ">=3.0.0" name = "wcwidth" version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" -optional = true +optional = false python-versions = "*" files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, @@ -6384,7 +6916,7 @@ dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] name = "wrapt" version = "1.16.0" description = "Module for decorators, wrappers and monkey patching." -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, @@ -6708,4 +7240,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "8fbc1ecc09a8f8aaf59e47f2d5eb21d98aca0d30c794de0b4ec567d3b7b4a0f0" +content-hash = "343c8d01b3efc03a1f4ed7b4eccee2cfe5750047a62502a23efd870227ccc74e" diff --git a/pyproject.toml b/pyproject.toml index 9287ca8cdf..2c33c8a090 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,6 +118,7 @@ rich = "^13.7.1" psycopg2 = { version = "^2.9.9", optional = true } pgvector = { version = "^0.2.5", optional = true } structlog = "^24.1.0" +llama-index = "^0.10.30" [tool.poetry.group.dev.dependencies] @@ -128,6 +129,7 @@ pytest-mock = "^3.12.0" ruff = "^0.3.0" black = "^24.2.0" pre-commit = "^3.7.0" +ipykernel = "^6.29.4" [tool.poetry.extras] chromadb = ["chromadb"] From 1bebd460c60de1021c2eec7c4cac59acc981270f Mon Sep 17 00:00:00 2001 From: Patrick Miramontes Date: Fri, 26 Apr 2024 16:01:04 +0000 Subject: [PATCH 2/7] feat(llamaindex): added llamaindex rm --- tests/retrieve/test_llama_index_rm.py | 30 --------------------------- 1 file changed, 30 deletions(-) diff --git a/tests/retrieve/test_llama_index_rm.py b/tests/retrieve/test_llama_index_rm.py index ada5b7cf93..23f6e37245 100644 --- a/tests/retrieve/test_llama_index_rm.py +++ b/tests/retrieve/test_llama_index_rm.py @@ -92,33 +92,3 @@ def test_lirm_as_rm(rag_setup): assert len(test_res_li) == len( test_res_dspy ), "Ensuring that the results are the same length, a rough equality check of the results" - - -def test_lirm_opt_eval(rag_setup): - """Optimizes and evaluates a module containing a LI retriever""" - - teleprompter = rag_setup.get("tp") - lm = rag_setup.get("lm") - rm = rag_setup.get("rm") - trainset = rag_setup.get("trainset") - devset = rag_setup.get("devset") - - dspy.settings.configure(rm=rm, lm=lm) - - hotpot_qa_eval = Evaluate(devset=devset, num_threads=1) - - # print(hotpot_qa_eval) - - uncompiled_score = hotpot_qa_eval(MockModule(), metric=validate_context_and_answer) - - # print(uncompiled_score) - - compiled_mock_module = teleprompter.compile(MockModule(), teacher=MockModule(), trainset=trainset) - - # print(compiled_mock_module) - - compiled_score = hotpot_qa_eval(compiled_mock_module, metric=validate_context_and_answer) - - # print(compiled_score) - - assert True From d630d8eb2260d937856ccb000751fbe110c73196 Mon Sep 17 00:00:00 2001 From: Patrick Miramontes Date: Fri, 26 Apr 2024 16:05:24 +0000 Subject: [PATCH 3/7] feat(llamaindex): refactored tests --- tests/retrieve/test_llama_index_rm.py | 31 --------------------------- 1 file changed, 31 deletions(-) diff --git a/tests/retrieve/test_llama_index_rm.py b/tests/retrieve/test_llama_index_rm.py index 23f6e37245..3223ad7978 100644 --- a/tests/retrieve/test_llama_index_rm.py +++ b/tests/retrieve/test_llama_index_rm.py @@ -12,34 +12,6 @@ from dspy.teleprompt import BootstrapFinetune -class MockQnA(dspy.Signature): - """Answer questions from given context""" - - context = dspy.InputField(desc="Context to answer questions with") - question = dspy.InputField(desc="Question to answer") - answer = dspy.OutputField(desc="Answer to the question") - - -class MockModule(dspy.Module): - """Mock pipeline for testing""" - - def __init__(self): - super().__init__() - - self.retrieve = dspy.Retrieve() - self.generate_answer = dspy.Predict(MockQnA) - - def forward(self, question) -> dspy.Example: - context = self.retrieve(question).passages - - answer = self.generate_answer( - context=context, - question=question, - ).answer - - return dspy.Prediction(context=context, question=question, answer=answer) - - def validate_context_and_answer(example, pred, trace=None): """Copied this from the intro.ipynb""" answer_EM = dspy.evaluate.answer_exact_match(example, pred) @@ -62,14 +34,11 @@ def rag_setup() -> dict: retriever = index.as_retriever() rm = LlamaIndexRM(retriever) - tp = BootstrapFinetune(metric=validate_context_and_answer) - return { "index": index, "retriever": retriever, "rm": rm, "lm": DummyLM(answers=dummyset), - "tp": tp, "trainset": trainset, "devset": devset, } From a2f2bd26eaa7e33b8dd8ce9bde28d061c0461686 Mon Sep 17 00:00:00 2001 From: Patrick Miramontes Date: Sun, 28 Apr 2024 14:53:52 +0000 Subject: [PATCH 4/7] refactor(llama_index_rm): per pr comments --- dspy/retrieve/llama_index_rm.py | 47 ++++++++++++++++++++++----- tests/retrieve/test_llama_index_rm.py | 13 +------- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/dspy/retrieve/llama_index_rm.py b/dspy/retrieve/llama_index_rm.py index 0b861bdf40..5a462abb12 100644 --- a/dspy/retrieve/llama_index_rm.py +++ b/dspy/retrieve/llama_index_rm.py @@ -1,3 +1,6 @@ +import logging +from typing import Optional + import dspy try: @@ -6,6 +9,8 @@ err = "The 'llama_index' package is required to use LlamaIndexRM. Install it with 'pip install llama_index'." raise ImportError(err) from None +NO_TOP_K_WARNING = "The underlying LlamaIndex retriever does not support top k retrieval. Ignoring k value." + class LlamaIndexRM(dspy.Retrieve): """Implements a retriever which wraps over a LlamaIndex retriever. @@ -19,6 +24,9 @@ class LlamaIndexRM(dspy.Retrieve): Args: retriever (BaseRetriever): A LlamaIndex retriever object - text based only + k (int): Optional; the number of examples to retrieve (similarity_top_k) + + If the underlying LI retriever does not have the property similarity_top_k, k will be ignored. Returns: DSPy RM Object - this is a retriever object that can be used in DSPy @@ -26,32 +34,55 @@ class LlamaIndexRM(dspy.Retrieve): retriever: BaseRetriever - def __init__(self, retriever: BaseRetriever): + def __init__( + self, + retriever: BaseRetriever, + k: Optional[int] = 3, + ): self.retriever = retriever + if hasattr(self.retriever, "similarity_top_k"): + self.retriever.similarity_top_k = k + else: + logging.warning(NO_TOP_K_WARNING) + @property - def similarity_top_k(self) -> int: - """Return similarity top k of retriever.""" + def k(self) -> int: + """Get similarity top k of retriever.""" + if not hasattr(self.retriever, "similarity_top_k"): + logging.warning(NO_TOP_K_WARNING) + return None + return self.retriever.similarity_top_k - @similarity_top_k.setter - def similarity_top_k(self, k: int) -> None: + @k.setter + def k(self, k: int) -> None: """Set similarity top k of retriever.""" - self.retriever.similarity_top_k = k + if hasattr(self.retriever, "similarity_top_k"): + self.retriever.similarity_top_k = k + else: + logging.warning(NO_TOP_K_WARNING) - def forward(self, query: str) -> list[dspy.Example]: + def forward(self, query: str, k: Optional[int] = None) -> list[dspy.Example]: """Forward function for the LI retriever. This is the function that is called to retrieve the top k examples for a given query. - Top k is set via the setter similarity_top_k or at LI instantiation. Args: query (str): The query to retrieve examples for + k (int): Optional; the number of examples to retrieve (similarity_top_k) + + If the underlying LI retriever does not have the property similarity_top_k, k will be ignored. Returns: List[dspy.Example]: A list of examples retrieved by the retriever """ + if hasattr(self.retriever, "similarity_top_k") and k: + self.retriever.similarity_top_k = k + else: + logging.warning(NO_TOP_K_WARNING) + raw = self.retriever.retrieve(query) return [ diff --git a/tests/retrieve/test_llama_index_rm.py b/tests/retrieve/test_llama_index_rm.py index 3223ad7978..4bc3c41a4b 100644 --- a/tests/retrieve/test_llama_index_rm.py +++ b/tests/retrieve/test_llama_index_rm.py @@ -7,16 +7,7 @@ import dspy from dsp.modules.dummy_lm import DummyLM from dspy.datasets import HotPotQA -from dspy.evaluate import Evaluate from dspy.retrieve.llama_index_rm import LlamaIndexRM -from dspy.teleprompt import BootstrapFinetune - - -def validate_context_and_answer(example, pred, trace=None): - """Copied this from the intro.ipynb""" - answer_EM = dspy.evaluate.answer_exact_match(example, pred) - answer_PM = dspy.evaluate.answer_passage_match(example, pred) - return answer_EM and answer_PM @pytest.fixture() @@ -58,6 +49,4 @@ def test_lirm_as_rm(rag_setup): assert isinstance(rm, dspy.Retrieve), "Ensuring the RM is a retriever object from dspy" assert isinstance(test_res_dspy, list), "Ensuring the results are a list from the DSPy retriever" - assert len(test_res_li) == len( - test_res_dspy - ), "Ensuring that the results are the same length, a rough equality check of the results" + assert len(test_res_li) == len(test_res_dspy), "Rough equality check of the results" From 5d1989a805880f1a3f87bd94506e83364567a8dd Mon Sep 17 00:00:00 2001 From: Patrick Miramontes Date: Mon, 29 Apr 2024 15:49:29 +0000 Subject: [PATCH 5/7] refactor(llama_index_rm): simplified class properties. Updated requirements.txt. --- dspy/retrieve/llama_index_rm.py | 16 ++++++---------- requirements.txt | 1 + 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/dspy/retrieve/llama_index_rm.py b/dspy/retrieve/llama_index_rm.py index 5a462abb12..639d849ba8 100644 --- a/dspy/retrieve/llama_index_rm.py +++ b/dspy/retrieve/llama_index_rm.py @@ -37,17 +37,15 @@ class LlamaIndexRM(dspy.Retrieve): def __init__( self, retriever: BaseRetriever, - k: Optional[int] = 3, + k: Optional[int] = None, ): self.retriever = retriever - if hasattr(self.retriever, "similarity_top_k"): - self.retriever.similarity_top_k = k - else: - logging.warning(NO_TOP_K_WARNING) + if k: + self.k = k @property - def k(self) -> int: + def k(self) -> Optional[int]: """Get similarity top k of retriever.""" if not hasattr(self.retriever, "similarity_top_k"): logging.warning(NO_TOP_K_WARNING) @@ -78,10 +76,8 @@ def forward(self, query: str, k: Optional[int] = None) -> list[dspy.Example]: Returns: List[dspy.Example]: A list of examples retrieved by the retriever """ - if hasattr(self.retriever, "similarity_top_k") and k: - self.retriever.similarity_top_k = k - else: - logging.warning(NO_TOP_K_WARNING) + if k: + self.k = k raw = self.retriever.retrieve(query) diff --git a/requirements.txt b/requirements.txt index e5569d1f3d..bd837e7588 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ backoff datasets joblib<=1.3.2 +llama-index>=0.10.30 openai>=0.28.1,<2.0.0 optuna pandas From 216546b92c463a66f133694249801e050f750002 Mon Sep 17 00:00:00 2001 From: Patrick Miramontes Date: Fri, 10 May 2024 00:35:25 +0000 Subject: [PATCH 6/7] fix(conflicts): resolved li dependency conflicts --- poetry.lock | 134 +++++++++++++++++++++++------------------------ pyproject.toml | 2 +- requirements.txt | 1 - 3 files changed, 68 insertions(+), 69 deletions(-) diff --git a/poetry.lock b/poetry.lock index 3cd0daec9e..10a1c9bccc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -467,7 +467,7 @@ typecheck = ["mypy"] name = "beautifulsoup4" version = "4.12.3" description = "Screen-scraping library" -optional = false +optional = true python-versions = ">=3.6.0" files = [ {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, @@ -973,13 +973,13 @@ test-randomorder = ["pytest-randomly"] [[package]] name = "dataclasses-json" -version = "0.6.4" +version = "0.6.5" description = "Easily serialize dataclasses to and from JSON." -optional = false -python-versions = ">=3.7,<4.0" +optional = true +python-versions = "<4.0,>=3.7" files = [ - {file = "dataclasses_json-0.6.4-py3-none-any.whl", hash = "sha256:f90578b8a3177f7552f4e1a6e535e84293cd5da421fcce0642d49c0d7bdf8df2"}, - {file = "dataclasses_json-0.6.4.tar.gz", hash = "sha256:73696ebf24936560cca79a2430cbc4f3dd23ac7bf46ed17f38e5e5e7657a6377"}, + {file = "dataclasses_json-0.6.5-py3-none-any.whl", hash = "sha256:f49c77aa3a85cac5bf5b7f65f4790ca0d2be8ef4d92c75e91ba0103072788a39"}, + {file = "dataclasses_json-0.6.5.tar.gz", hash = "sha256:1c287594d9fcea72dc42d6d3836cf14848c2dc5ce88f65ed61b36b57f515fe26"}, ] [package.dependencies] @@ -1076,7 +1076,7 @@ files = [ name = "deprecated" version = "1.2.14" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, @@ -1108,7 +1108,7 @@ profile = ["gprof2dot (>=2022.7.29)"] name = "dirtyjson" version = "1.0.8" description = "JSON decoder for Python that can extract data from the muck" -optional = false +optional = true python-versions = "*" files = [ {file = "dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53"}, @@ -2295,19 +2295,19 @@ adal = ["adal (>=1.0.2)"] [[package]] name = "llama-index" -version = "0.10.30" +version = "0.10.35" description = "Interface between LLMs and your data" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index-0.10.30-py3-none-any.whl", hash = "sha256:7083014be8f74c6037cc33d629985a7505ee3c89b972503f8dd7d4ac7c45a4f4"}, - {file = "llama_index-0.10.30.tar.gz", hash = "sha256:58fab1047c75e39803e8c394fab7d5e81ce97f27281bf7cba6bea40a7328562e"}, + {file = "llama_index-0.10.35-py3-none-any.whl", hash = "sha256:1e30f7dceff5e05cb9bfe8727b767487dfe4f97dc2c71a4ca8276aa983dc6e9e"}, + {file = "llama_index-0.10.35.tar.gz", hash = "sha256:cf1b9ac3b65cc4fd035bfeb0010353a65403aa202d830e21dc7beda7e6284e62"}, ] [package.dependencies] llama-index-agent-openai = ">=0.1.4,<0.3.0" llama-index-cli = ">=0.1.2,<0.2.0" -llama-index-core = ">=0.10.30,<0.11.0" +llama-index-core = ">=0.10.35,<0.11.0" llama-index-embeddings-openai = ">=0.1.5,<0.2.0" llama-index-indices-managed-llama-cloud = ">=0.1.2,<0.2.0" llama-index-legacy = ">=0.9.48,<0.10.0" @@ -2320,17 +2320,17 @@ llama-index-readers-llama-parse = ">=0.1.2,<0.2.0" [[package]] name = "llama-index-agent-openai" -version = "0.2.2" +version = "0.2.4" description = "llama-index agent openai integration" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_agent_openai-0.2.2-py3-none-any.whl", hash = "sha256:fa8cbc2c7be5a465848f8d5b432db01c55f07dfa06357edb7fb77fb17d534d1e"}, - {file = "llama_index_agent_openai-0.2.2.tar.gz", hash = "sha256:12063dd932c74015796f973986cc52d783f51fda38e4ead72a56d0fd195925ee"}, + {file = "llama_index_agent_openai-0.2.4-py3-none-any.whl", hash = "sha256:b05eb7f0331d40a7a2bcaabaa84c9c7ebe6837a72038d03cbb71c083a4301a81"}, + {file = "llama_index_agent_openai-0.2.4.tar.gz", hash = "sha256:cd4a58f8bf233728ceda554cbb34de56a2b6bbbbff6ce801c3f8ff0c8280bf55"}, ] [package.dependencies] -llama-index-core = ">=0.10.1,<0.11.0" +llama-index-core = ">=0.10.35,<0.11.0" llama-index-llms-openai = ">=0.1.5,<0.2.0" openai = ">=1.14.0" @@ -2338,7 +2338,7 @@ openai = ">=1.14.0" name = "llama-index-cli" version = "0.1.12" description = "llama-index cli" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ {file = "llama_index_cli-0.1.12-py3-none-any.whl", hash = "sha256:d80d546786f02d3f16f6183b8e86b22b8b5c33a1500923659f2ccbff8d5df634"}, @@ -2352,13 +2352,13 @@ llama-index-llms-openai = ">=0.1.1,<0.2.0" [[package]] name = "llama-index-core" -version = "0.10.30" +version = "0.10.35.post1" description = "Interface between LLMs and your data" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_core-0.10.30-py3-none-any.whl", hash = "sha256:2f291ce2975f9dbf0ea87d684d3d8122ce216265f468f32baa2cf4ecfb34ed2a"}, - {file = "llama_index_core-0.10.30.tar.gz", hash = "sha256:bed3f683606a0b0eb0839677c935a4b57b7bae509a95d380e51c6225630660e0"}, + {file = "llama_index_core-0.10.35.post1-py3-none-any.whl", hash = "sha256:1c5993946202a9aec86bd6f0943991d1fe443556bd3e6c7b345cb360a46dc6c2"}, + {file = "llama_index_core-0.10.35.post1.tar.gz", hash = "sha256:f62013217bf7c04b6adf9dc2c1b168ff957f924519f19af2f383a0f0c34308e4"}, ] [package.dependencies] @@ -2396,13 +2396,13 @@ query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "l [[package]] name = "llama-index-embeddings-openai" -version = "0.1.8" +version = "0.1.9" description = "llama-index embeddings openai integration" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_embeddings_openai-0.1.8-py3-none-any.whl", hash = "sha256:186be52c29a8c82a39ff6e033e3a61b9c5a17cf6177a5507abe4f5c79fea3a53"}, - {file = "llama_index_embeddings_openai-0.1.8.tar.gz", hash = "sha256:aae4ce3ec2fe2cb604d944646982c71663a8b99ed5f8c370823ee82eef4ddd3b"}, + {file = "llama_index_embeddings_openai-0.1.9-py3-none-any.whl", hash = "sha256:fbd16d6197b91f4dbdc6d0707e573cc224ac2b0a48d5b370c6232dd8a2282473"}, + {file = "llama_index_embeddings_openai-0.1.9.tar.gz", hash = "sha256:0fd292b2f9a0ad4534a790d6374726bc885853188087eb018167dcf239643924"}, ] [package.dependencies] @@ -2410,24 +2410,24 @@ llama-index-core = ">=0.10.1,<0.11.0" [[package]] name = "llama-index-indices-managed-llama-cloud" -version = "0.1.5" +version = "0.1.6" description = "llama-index indices llama-cloud integration" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_indices_managed_llama_cloud-0.1.5-py3-none-any.whl", hash = "sha256:79f636cb6f4fabb12fec153564110f7f4dfda3cacb087793a5fec988484d7d2c"}, - {file = "llama_index_indices_managed_llama_cloud-0.1.5.tar.gz", hash = "sha256:47cdde9f06bbddd508f0efcf41de425e85171ac2c8fda8a5fb2a89673e1c8c71"}, + {file = "llama_index_indices_managed_llama_cloud-0.1.6-py3-none-any.whl", hash = "sha256:cba33e1a3677b2a2ae7f239119acbf6dc3818f105edc92315729842b56fbc949"}, + {file = "llama_index_indices_managed_llama_cloud-0.1.6.tar.gz", hash = "sha256:74b3b0e9ebf9d348d3054f9fc0c657031acceb9351c31116ad8d5a7ae4729f5c"}, ] [package.dependencies] llama-index-core = ">=0.10.0,<0.11.0" -llamaindex-py-client = ">=0.1.13,<0.2.0" +llamaindex-py-client = ">=0.1.19,<0.2.0" [[package]] name = "llama-index-legacy" version = "0.9.48" description = "Interface between LLMs and your data" -optional = false +optional = true python-versions = ">=3.8.1,<4.0" files = [ {file = "llama_index_legacy-0.9.48-py3-none-any.whl", hash = "sha256:714ada95beac179b4acefa4d2deff74bb7b2f22b0f699ac247d4cb67738d16d4"}, @@ -2464,13 +2464,13 @@ query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "l [[package]] name = "llama-index-llms-openai" -version = "0.1.16" +version = "0.1.18" description = "llama-index llms openai integration" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_llms_openai-0.1.16-py3-none-any.whl", hash = "sha256:4a9c0fe969302731907c8fa31631812397637e114a44ebbad11fd6c59def9315"}, - {file = "llama_index_llms_openai-0.1.16.tar.gz", hash = "sha256:313bbc17c241992430a6bf686a1b1edc4276c8256ad6b0550aa1bea1e0fed1a6"}, + {file = "llama_index_llms_openai-0.1.18-py3-none-any.whl", hash = "sha256:934cf72d10385f1c76c0183b0e94ce1850fab1026287e01b7db0a14c946dfd79"}, + {file = "llama_index_llms_openai-0.1.18.tar.gz", hash = "sha256:8cb7546a1885ba558ff580b114d638569a0aed81a264961114e719bc42b37100"}, ] [package.dependencies] @@ -2480,7 +2480,7 @@ llama-index-core = ">=0.10.24,<0.11.0" name = "llama-index-multi-modal-llms-openai" version = "0.1.5" description = "llama-index multi-modal-llms openai integration" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ {file = "llama_index_multi_modal_llms_openai-0.1.5-py3-none-any.whl", hash = "sha256:bb332580e7e4b5f2f87488b3649d2ceb53ee82c848e59694578a982c3982ce0b"}, @@ -2493,13 +2493,13 @@ llama-index-llms-openai = ">=0.1.1,<0.2.0" [[package]] name = "llama-index-program-openai" -version = "0.1.5" +version = "0.1.6" description = "llama-index program openai integration" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_program_openai-0.1.5-py3-none-any.whl", hash = "sha256:20b6efa706ac73e4dc5086900fea1ffcb1eb0787c8a6f081669d37da7235aee0"}, - {file = "llama_index_program_openai-0.1.5.tar.gz", hash = "sha256:c33aa2d2876ad0ff1f9a2a755d4e7d4917240847d0174e7b2d0b8474499bb700"}, + {file = "llama_index_program_openai-0.1.6-py3-none-any.whl", hash = "sha256:4660b338503537c5edca1e0dab606af6ce372b4f1b597e2833c6b602447c5d8d"}, + {file = "llama_index_program_openai-0.1.6.tar.gz", hash = "sha256:c6a4980c5ea826088b28b4dee3367edb20221e6d05eb0e05019049190131d772"}, ] [package.dependencies] @@ -2511,7 +2511,7 @@ llama-index-llms-openai = ">=0.1.1,<0.2.0" name = "llama-index-question-gen-openai" version = "0.1.3" description = "llama-index question_gen openai integration" -optional = false +optional = true python-versions = ">=3.8.1,<4.0" files = [ {file = "llama_index_question_gen_openai-0.1.3-py3-none-any.whl", hash = "sha256:1f83b49e8b2e665030d1ec8c54687d6985d9fa8426147b64e46628a9e489b302"}, @@ -2525,13 +2525,13 @@ llama-index-program-openai = ">=0.1.1,<0.2.0" [[package]] name = "llama-index-readers-file" -version = "0.1.19" +version = "0.1.22" description = "llama-index readers file integration" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_index_readers_file-0.1.19-py3-none-any.whl", hash = "sha256:25eb6d066dc38753de435e876ef8511c68d84102302c053b7dcb0776db68fced"}, - {file = "llama_index_readers_file-0.1.19.tar.gz", hash = "sha256:194c1b9b85c265159b7302c7d80adba937aab06f05c170af7fd95c4e7a8aec35"}, + {file = "llama_index_readers_file-0.1.22-py3-none-any.whl", hash = "sha256:a8d4a69a9ea659c14ebb22ca9a5560b9c7ec6f501e7f68f6c52f591374165376"}, + {file = "llama_index_readers_file-0.1.22.tar.gz", hash = "sha256:37de54ad0cfbdc607c195532b9a292417a4714f57773570b87027b8dc381f0e2"}, ] [package.dependencies] @@ -2547,7 +2547,7 @@ pymupdf = ["pymupdf (>=1.23.21,<2.0.0)"] name = "llama-index-readers-llama-parse" version = "0.1.4" description = "llama-index readers llama-parse integration" -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ {file = "llama_index_readers_llama_parse-0.1.4-py3-none-any.whl", hash = "sha256:c4914b37d12cceee56fbd185cca80f87d60acbf8ea7a73f9719610180be1fcdd"}, @@ -2560,13 +2560,13 @@ llama-parse = ">=0.4.0,<0.5.0" [[package]] name = "llama-parse" -version = "0.4.1" +version = "0.4.2" description = "Parse files into RAG-Optimized formats." -optional = false +optional = true python-versions = "<4.0,>=3.8.1" files = [ - {file = "llama_parse-0.4.1-py3-none-any.whl", hash = "sha256:2c08962b66791c61fc360ae2042f953729c7b8decc3590d01fea5a98ca1f6676"}, - {file = "llama_parse-0.4.1.tar.gz", hash = "sha256:d723af84d6a1fc99eb431915d21865d20b76d8a246dbaa124d1f96c956a644f7"}, + {file = "llama_parse-0.4.2-py3-none-any.whl", hash = "sha256:5ce0390141f216dcd88c1123fea7f2a4f561d177f791a97217a3db3509dec4ff"}, + {file = "llama_parse-0.4.2.tar.gz", hash = "sha256:fa04c09730b102155f6505de9cf91998c86d334581f0f12597c5eb47ca5db859"}, ] [package.dependencies] @@ -2574,13 +2574,13 @@ llama-index-core = ">=0.10.29" [[package]] name = "llamaindex-py-client" -version = "0.1.18" +version = "0.1.19" description = "" -optional = false +optional = true python-versions = "<4,>=3.8" files = [ - {file = "llamaindex_py_client-0.1.18-py3-none-any.whl", hash = "sha256:5417e41666504a77ecf5bdd9b403ffff1d714880ee30d49e234fb7686177eeeb"}, - {file = "llamaindex_py_client-0.1.18.tar.gz", hash = "sha256:091ee49a92592e3894777ade12516c2137093f9d6441a549f406461917ce9b7e"}, + {file = "llamaindex_py_client-0.1.19-py3-none-any.whl", hash = "sha256:fd9416fd78b97209bf323bc3c7fab314499778563e7274f10853ad560563d10e"}, + {file = "llamaindex_py_client-0.1.19.tar.gz", hash = "sha256:73f74792bb8c092bae6dc626627a09ac13a099fa8d10f8fcc83e17a2b332cca7"}, ] [package.dependencies] @@ -2770,7 +2770,7 @@ urllib3 = "*" name = "marshmallow" version = "3.21.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "marshmallow-3.21.1-py3-none-any.whl", hash = "sha256:f085493f79efb0644f270a9bf2892843142d80d7174bbbd2f3713f2a589dc633"}, @@ -3330,7 +3330,7 @@ test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] name = "nltk" version = "3.8.1" description = "Natural Language Toolkit" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "nltk-3.8.1-py3-none-any.whl", hash = "sha256:fd5c9109f976fa86bcadba8f91e47f5e9293bd034474752e92a520f81c93dda5"}, @@ -3946,7 +3946,7 @@ numpy = "*" name = "pillow" version = "10.3.0" description = "Python Imaging Library (Fork)" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45"}, @@ -4652,7 +4652,7 @@ diagrams = ["jinja2", "railroad-diagrams"] name = "pypdf" version = "4.2.0" description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "pypdf-4.2.0-py3-none-any.whl", hash = "sha256:dc035581664e0ad717e3492acebc1a5fc23dba759e788e3d4a9fc9b1a32e72c1"}, @@ -5515,7 +5515,7 @@ files = [ name = "soupsieve" version = "2.5" description = "A modern CSS selector implementation for Beautiful Soup." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, @@ -5883,7 +5883,7 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7 name = "striprtf" version = "0.0.26" description = "A simple library to convert rtf to text" -optional = false +optional = true python-versions = "*" files = [ {file = "striprtf-0.0.26-py3-none-any.whl", hash = "sha256:8c8f9d32083cdc2e8bfb149455aa1cc5a4e0a035893bedc75db8b73becb3a1bb"}, @@ -5939,7 +5939,7 @@ widechars = ["wcwidth"] name = "tenacity" version = "8.2.3" description = "Retry code until it succeeds" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, @@ -5953,7 +5953,7 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "tiktoken" version = "0.6.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "tiktoken-0.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:277de84ccd8fa12730a6b4067456e5cf72fef6300bea61d506c09e45658d41ac"}, @@ -6386,7 +6386,7 @@ files = [ name = "typing-inspect" version = "0.9.0" description = "Runtime inspection utilities for typing module." -optional = false +optional = true python-versions = "*" files = [ {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"}, @@ -6916,7 +6916,7 @@ dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] name = "wrapt" version = "1.16.0" description = "Module for decorators, wrappers and monkey patching." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, @@ -7240,4 +7240,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "343c8d01b3efc03a1f4ed7b4eccee2cfe5750047a62502a23efd870227ccc74e" +content-hash = "021917cb8150dab55dcee83c0fa4b5e86dcab664e8b093874055228e9011fe94" diff --git a/pyproject.toml b/pyproject.toml index 2c33c8a090..518ada0c40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,7 +118,7 @@ rich = "^13.7.1" psycopg2 = { version = "^2.9.9", optional = true } pgvector = { version = "^0.2.5", optional = true } structlog = "^24.1.0" -llama-index = "^0.10.30" +llama-index = {version = "^0.10.30", optional = true} [tool.poetry.group.dev.dependencies] diff --git a/requirements.txt b/requirements.txt index bd837e7588..e5569d1f3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ backoff datasets joblib<=1.3.2 -llama-index>=0.10.30 openai>=0.28.1,<2.0.0 optuna pandas From 269d9e8822ce73839a85f896db7ca6bcccbb2494 Mon Sep 17 00:00:00 2001 From: Patrick Miramontes Date: Fri, 10 May 2024 00:49:22 +0000 Subject: [PATCH 7/7] fix(dependencies): updated LI tests to be optional --- tests/retrieve/test_llama_index_rm.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/retrieve/test_llama_index_rm.py b/tests/retrieve/test_llama_index_rm.py index 4bc3c41a4b..35711087e9 100644 --- a/tests/retrieve/test_llama_index_rm.py +++ b/tests/retrieve/test_llama_index_rm.py @@ -1,18 +1,27 @@ +import logging + import pytest -from llama_index.core import Settings, VectorStoreIndex -from llama_index.core.base.base_retriever import BaseRetriever -from llama_index.core.embeddings.mock_embed_model import MockEmbedding -from llama_index.core.readers.string_iterable import StringIterableReader import dspy from dsp.modules.dummy_lm import DummyLM from dspy.datasets import HotPotQA -from dspy.retrieve.llama_index_rm import LlamaIndexRM + +try: + from llama_index.core import Settings, VectorStoreIndex + from llama_index.core.base.base_retriever import BaseRetriever + from llama_index.core.embeddings.mock_embed_model import MockEmbedding + from llama_index.core.readers.string_iterable import StringIterableReader + + from dspy.retrieve.llama_index_rm import LlamaIndexRM + +except ImportError: + logging.info("Optional dependency llama-index is not installed - skipping LlamaIndexRM tests.") @pytest.fixture() def rag_setup() -> dict: """Builds the necessary fixtures to test LI""" + pytest.importorskip("llamaindex") dataset = HotPotQA(train_seed=1, train_size=8, eval_seed=2023, dev_size=4, test_size=0) trainset = [x.with_inputs("question") for x in dataset.train] devset = [x.with_inputs("question") for x in dataset.dev] @@ -37,7 +46,7 @@ def rag_setup() -> dict: def test_lirm_as_rm(rag_setup): """Test the retriever as retriever method""" - + pytest.importorskip("llamaindex") retriever = rag_setup.get("retriever") test_res_li = retriever.retrieve("At My Window was released by which American singer-songwriter?") rm = rag_setup.get("rm")