From d7b41cd7299e1bdf91f36c124d172d6807444a69 Mon Sep 17 00:00:00 2001 From: alejandro Date: Wed, 17 Apr 2024 02:51:17 +0000 Subject: [PATCH 01/24] feat(dspy): testing initial commit --- poetry.lock | 65 +++++++++++++++++++++++++------------------------- pyproject.toml | 1 + 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/poetry.lock b/poetry.lock index d92a6a00e4..e760840c18 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -201,7 +201,7 @@ trio = ["trio (>=0.23)"] name = "appnope" version = "0.1.4" description = "Disable App Nap on macOS >= 10.9" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c"}, @@ -269,7 +269,7 @@ tests = ["pytest"] name = "asttokens" version = "2.4.1" description = "Annotate AST trees with source code positions" -optional = true +optional = false python-versions = "*" files = [ {file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"}, @@ -597,7 +597,7 @@ files = [ name = "cffi" version = "1.16.0" description = "Foreign Function Interface for Python calling C code." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, @@ -904,7 +904,7 @@ development = ["black", "flake8", "mypy", "pytest", "types-colorama"] name = "comm" version = "0.2.2" description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"}, @@ -1019,7 +1019,7 @@ vision = ["Pillow (>=6.2.1)"] name = "debugpy" version = "1.8.1" description = "An implementation of the Debug Adapter Protocol for Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "debugpy-1.8.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:3bda0f1e943d386cc7a0e71bfa59f4137909e2ed947fb3946c506e113000f741"}, @@ -1050,7 +1050,7 @@ files = [ name = "decorator" version = "5.1.1" description = "Decorators for Humans" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, @@ -1181,7 +1181,7 @@ test = ["pytest (>=6)"] name = "executing" version = "2.0.1" description = "Get the currently executing AST node of a frame, and other information" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"}, @@ -1996,13 +1996,13 @@ files = [ [[package]] name = "ipykernel" -version = "6.29.3" +version = "6.29.4" description = "IPython Kernel for Jupyter" -optional = true +optional = false python-versions = ">=3.8" files = [ - {file = "ipykernel-6.29.3-py3-none-any.whl", hash = "sha256:5aa086a4175b0229d4eca211e181fb473ea78ffd9869af36ba7694c947302a21"}, - {file = "ipykernel-6.29.3.tar.gz", hash = "sha256:e14c250d1f9ea3989490225cc1a542781b095a18a19447fcf2b5eaf7d0ac5bd2"}, + {file = "ipykernel-6.29.4-py3-none-any.whl", hash = "sha256:1181e653d95c6808039c509ef8e67c4126b3b3af7781496c7cbfb5ed938a27da"}, + {file = "ipykernel-6.29.4.tar.gz", hash = "sha256:3d44070060f9475ac2092b760123fadf105d2e2493c24848b6691a7c4f42af5c"}, ] [package.dependencies] @@ -2031,7 +2031,7 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio name = "ipython" version = "8.18.1" description = "IPython: Productive Interactive Computing" -optional = true +optional = false python-versions = ">=3.9" files = [ {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"}, @@ -2082,7 +2082,7 @@ six = "*" name = "jedi" version = "0.19.1" description = "An autocompletion tool for Python that can be used for text editors." -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "jedi-0.19.1-py2.py3-none-any.whl", hash = "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0"}, @@ -2202,7 +2202,7 @@ testing = ["coverage", "ipykernel", "jupytext", "matplotlib", "nbdime", "nbforma name = "jupyter-client" version = "8.6.1" description = "Jupyter protocol implementation and client libraries" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "jupyter_client-8.6.1-py3-none-any.whl", hash = "sha256:3b7bd22f058434e3b9a7ea4b1500ed47de2713872288c0d511d19926f99b459f"}, @@ -2225,7 +2225,7 @@ test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pyt name = "jupyter-core" version = "5.7.2" description = "Jupyter core package. A base package on which Jupyter projects rely." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409"}, @@ -2484,7 +2484,7 @@ tests = ["pytest", "pytz", "simplejson"] name = "matplotlib-inline" version = "0.1.6" description = "Inline Matplotlib backend for Jupyter" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "matplotlib-inline-0.1.6.tar.gz", hash = "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304"}, @@ -2996,7 +2996,7 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] name = "nest-asyncio" version = "1.6.0" description = "Patch asyncio to allow nested event loops" -optional = true +optional = false python-versions = ">=3.5" files = [ {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, @@ -3564,7 +3564,7 @@ xml = ["lxml (>=4.9.2)"] name = "parso" version = "0.8.3" description = "A Python Parser" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "parso-0.8.3-py2.py3-none-any.whl", hash = "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75"}, @@ -3590,7 +3590,7 @@ files = [ name = "pexpect" version = "4.9.0" description = "Pexpect allows easy control of interactive console applications." -optional = true +optional = false python-versions = "*" files = [ {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"}, @@ -3732,7 +3732,7 @@ virtualenv = ">=20.10.0" name = "prompt-toolkit" version = "3.0.43" description = "Library for building powerful interactive command lines in Python" -optional = true +optional = false python-versions = ">=3.7.0" files = [ {file = "prompt_toolkit-3.0.43-py3-none-any.whl", hash = "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6"}, @@ -3766,7 +3766,7 @@ files = [ name = "psutil" version = "5.9.8" description = "Cross-platform lib for process and system monitoring in Python." -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"}, @@ -3816,7 +3816,7 @@ files = [ name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" -optional = true +optional = false python-versions = "*" files = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, @@ -3874,7 +3874,7 @@ functions = ["apache-bookkeeper-client (>=4.16.1)", "grpcio (>=1.60.0)", "promet name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" -optional = true +optional = false python-versions = "*" files = [ {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, @@ -3983,7 +3983,7 @@ pyasn1 = ">=0.4.6,<0.6.0" name = "pycparser" version = "2.21" description = "C parser in Python" -optional = true +optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, @@ -4338,7 +4338,7 @@ files = [ name = "pywin32" version = "306" description = "Python for Window Extensions" -optional = true +optional = false python-versions = "*" files = [ {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, @@ -4382,6 +4382,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -4434,7 +4435,7 @@ pyyaml = "*" name = "pyzmq" version = "25.1.2" description = "Python bindings for 0MQ" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "pyzmq-25.1.2-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:e624c789359f1a16f83f35e2c705d07663ff2b4d4479bad35621178d8f0f6ea4"}, @@ -5410,7 +5411,7 @@ sqlcipher = ["sqlcipher3_binary"] name = "stack-data" version = "0.6.3" description = "Extract data from python stack frames and tracebacks for informative displays" -optional = true +optional = false python-versions = "*" files = [ {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"}, @@ -5713,7 +5714,7 @@ optree = ["optree (>=0.9.1)"] name = "tornado" version = "6.4" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -optional = true +optional = false python-versions = ">= 3.8" files = [ {file = "tornado-6.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:02ccefc7d8211e5a7f9e8bc3f9e5b0ad6262ba2fbb683a6443ecc804e5224ce0"}, @@ -5753,7 +5754,7 @@ telegram = ["requests"] name = "traitlets" version = "5.14.2" description = "Traitlets Python configuration system" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "traitlets-5.14.2-py3-none-any.whl", hash = "sha256:fcdf85684a772ddeba87db2f398ce00b40ff550d1528c03c14dbf6a02003cd80"}, @@ -6263,7 +6264,7 @@ anyio = ">=3.0.0" name = "wcwidth" version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" -optional = true +optional = false python-versions = "*" files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, @@ -6730,4 +6731,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "3988e0bedd832c87fda15828d8c6f08b2c3a9e75a9bca6d4201c5b8bdf5e3c9e" +content-hash = "54031010d98226a2f2d449d46e67a48e72927a8bee64c0493c3c2de0001663b4" diff --git a/pyproject.toml b/pyproject.toml index f99df941a0..9a998a3065 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,6 +128,7 @@ pytest-mock = "^3.12.0" ruff = "^0.3.0" black = "^24.2.0" pre-commit = "^3.7.0" +ipykernel = "^6.29.4" [tool.poetry.extras] chromadb = ["chromadb"] From dfae177731d33b84bc0430b9f8ec44e8fe643696 Mon Sep 17 00:00:00 2001 From: alejandro Date: Wed, 17 Apr 2024 18:25:34 +0000 Subject: [PATCH 02/24] feat(dspy): initial cleanup of Snowflake (LM)support --- dsp/modules/snowflake.py | 142 +++++++++++++++++++++++++++++++++++++++ dspy/__init__.py | 3 +- 2 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 dsp/modules/snowflake.py diff --git a/dsp/modules/snowflake.py b/dsp/modules/snowflake.py new file mode 100644 index 0000000000..3cc7c4aa5d --- /dev/null +++ b/dsp/modules/snowflake.py @@ -0,0 +1,142 @@ +"""Module for interacting with Snowflake Cortex.""" +import json +from typing import Any + +import backoff +from pydantic_core import PydanticCustomError + +from dsp.modules.lm import LM + +try: + from snowflake.snowpark import Session + from snowflake.snowpark import functions as snow_func + +except ImportError: + pass + + +def backoff_hdlr(details) -> None: + """Handler from https://pypi.org/project/backoff .""" + print( + f"Backing off {details['wait']:0.1f} seconds after {details['tries']} tries ", + f"calling function {details['target']} with kwargs", + f"{details['kwargs']}", + ) + + +def giveup_hdlr(details) -> bool: + """Wrapper function that decides when to give up on retry.""" + if "rate limits" in str(details): + return False + return True + + +class Snowflake(LM): + """Wrapper around Snowflake's CortexAPI. + + Currently supported models include 'mistral-large','reka-flash','mixtral-8x7b', + 'llama2-70b-chat','mistral-7b','gemma-7b'. + """ + + def __init__(self, model: str = "mistral-large", credentials=None, **kwargs): + """Parameters + + ---------- + model : str + Which pre-trained model from Snowflake to use? + Choices are 'mistral-large','reka-flash','mixtral-8x7b','llama2-70b-chat','mistral-7b','gemma-7b' + Full list of supported models is available here: https://docs.snowflake.com/en/user-guide/snowflake-cortex/llm-functions#complete + **kwargs: dict + Additional arguments to pass to the API provider. + """ + super().__init__(model) + + self.model = model + cortex_models = ["mistral-large", "reka-flash", "mixtral-8x7b", "llama2-70b-chat", "mistral-7b", "gemma-7b"] + + if model in cortex_models: + self.available_args = { + "max_tokens", + "temperature", + "top_p", + } + else: + raise PydanticCustomError( + "model", + 'model name is not valid, got "{model_name}"', + ) + + self.client = self._init_cortex(credentials=credentials) + self.provider = "Snowflake" + self.kwargs = { + **self.kwargs, + "temperature": 0.7, + "max_output_tokens": 1024, + "top_p": 1.0, + "top_k": 1, + **kwargs, + } + + @classmethod + def _init_cortex(cls, credentials: dict) -> None: + return Session.builder.configs(credentials).create() + + def _prepare_params( + self, + parameters: Any, + ) -> dict: + params_mapping = {"n": "candidate_count", "max_tokens": "max_output_tokens"} + params = {params_mapping.get(k, k): v for k, v in parameters.items()} + params = {**self.kwargs, **params} + return {k: params[k] for k in set(params.keys()) & self.available_args} + + def _cortex_complete_request(self, prompt: str, **kwargs) -> dict: + complete = snow_func.builtin("snowflake.cortex.complete") + cortex_complete_args = complete( + snow_func.lit(self.model), + snow_func.lit([{"role": "user", "content": prompt}]), + snow_func.lit(kwargs), + ) + raw_response = self.client.range(1).withColumn("complete_cal", cortex_complete_args).collect()[0].COMPLETE_CAL + + return json.loads(raw_response) + + def basic_request(self, prompt: str, **kwargs) -> list: + raw_kwargs = kwargs + kwargs = self._prepare_params(raw_kwargs) + + response = self._cortex_complete_request(prompt, **kwargs) + + history = { + "prompt": prompt, + "response": { + "prompt": prompt, + "choices": [{"text": c} for c in response["choices"]], + }, + "kwargs": kwargs, + "raw_kwargs": raw_kwargs, + } + + self.history.append(history) + + return [i["text"]["messages"] for i in history["response"]["choices"]] + + @backoff.on_exception( + backoff.expo, + (Exception), + max_time=1000, + on_backoff=backoff_hdlr, + giveup=giveup_hdlr, + ) + def _request(self, prompt: str, **kwargs): + """Handles retrieval of completions from Snowflake Cortex whilst handling API errors.""" + return self.basic_request(prompt, **kwargs) + + def __call__( + self, + prompt: str, + only_completed: bool = True, + return_sorted: bool = False, + **kwargs, + ): + return self._request(prompt, **kwargs) diff --git a/dspy/__init__.py b/dspy/__init__.py index 2d5ba4f003..be397521d4 100644 --- a/dspy/__init__.py +++ b/dspy/__init__.py @@ -5,10 +5,10 @@ from .primitives import * from .retrieve import * from .signatures import * +from .utils.logging import logger, set_log_level, set_log_output # Functional must be imported after primitives, predict and signatures from .functional import * # isort: skip -from .utils.logging import logger, set_log_level, set_log_output settings = dsp.settings @@ -25,6 +25,7 @@ Google = dsp.Google GoogleVertexAI = dsp.GoogleVertexAI GROQ = dsp.GroqLM +Snowflake = dsp.Snowflake HFClientTGI = dsp.HFClientTGI HFClientVLLM = HFClientVLLM From e492cfaca892aacd564461123cbeccc967dbd411 Mon Sep 17 00:00:00 2001 From: alejandro Date: Wed, 17 Apr 2024 18:54:08 +0000 Subject: [PATCH 03/24] feat(dspy): added SnowflakeRM retriever and cleanup of Snowflake (LM) support --- dspy/retrieve/snowflake_rm.py | 108 ++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 dspy/retrieve/snowflake_rm.py diff --git a/dspy/retrieve/snowflake_rm.py b/dspy/retrieve/snowflake_rm.py new file mode 100644 index 0000000000..ba807e132e --- /dev/null +++ b/dspy/retrieve/snowflake_rm.py @@ -0,0 +1,108 @@ +from typing import Optional, Union + +import dspy +from dsp.utils import dotdict + +try: + from snowflake.snowpark import Session + from snowflake.snowpark import functions as snow_fn + from snowflake.snowpark.functions import lit, vector_cosine_distance + from snowflake.snowpark.types import VectorType + +except ImportError: + raise ImportError( + "The 'snowflake' extra is required to use SnowflakeCortexRM.:" + "Install it with `pip install snowflake-snowpark-python`", + ) + + +class SnowflakeRM(dspy.Retrieve): + """A retrieval module that uses Weaviate to return the top passages for a given query. + + Assumes that a Snowflake table has been created and populated with the following payload: + - content: The text of the passage + + Args: + snowflake_credentials: connection parameters for initializing Snowlfake client. + snowflake_table_name (str): The name of the Snowflake table containing document embeddings. + embeddings_field (str): The field in the snowlake table with the content embeddings + embeddings_text_field (str): The field in the snowlake table with the content. + k (int, optional): The default number of top passages to retrieve. Defaults to 3. + """ + + def __init__( + self, + snowflake_table_name: str, + snowflake_credentials: dict, + k: int = 3, + embeddings_field: str = "chunk_vec", + embeddings_text_field: str = "chunk", + embeddings_model: str = "e5-base-v2", + ): + self.snowflake_table_name = snowflake_table_name + self.embeddings_field = embeddings_field + self.embeddings_text_field = embeddings_text_field + self.embeddings_model = embeddings_model + self.client = Session.builder.configs(snowflake_credentials).create() + + super().__init__(k=k) + + def forward(self, query_or_queries: Union[str, list[str]], k: Optional[int] = None) -> dspy.Prediction: + """Search Snowflake document embeddings table for self.k top passages for query. + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + k = k if k is not None else self.k + queries = [query_or_queries] if isinstance(query_or_queries, str) else query_or_queries + queries = [q for q in queries if q] + passages = [] + + for query in queries: + query_embeddings = self._get_embeddings(query) + top_k_chunks = self._top_k_similar_chunks(query_embeddings, k) + + passages.extend(dotdict({"long_text": passage[0]}) for passage in top_k_chunks) + + return passages + + def _top_k_similar_chunks(self, query_embeddings, k): + """Search Snowflake table for self.k top passages for query. + + Args: + query_embeddings(List[float]]): the embeddings for the query of interest + doc_table + k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + doc_table_value = self.embeddings_field + doc_table_key = self.embeddings_text_field + + doc_embeddings = self.client.table(self.snowflake_table_name) + top_k = ( + doc_embeddings.select( + doc_table_value, + doc_table_key, + vector_cosine_distance( + doc_embeddings.col(doc_table_value), + lit(query_embeddings).cast(VectorType(float, len(query_embeddings))), + ).as_("dist"), + ) + .sort("dist") + .limit(k) + ) + + return top_k.select(doc_table_key).to_pandas().values + + def _get_embeddings(self, query: str) -> list[float]: + # create embeddings for the query + embed = snow_fn.builtin("snowflake.cortex.embed_text") + cortex_embed_args = embed(snow_fn.lit(self.embeddings_model), snow_fn.lit(query)) + + return self.client.range(1).withColumn("complete_cal", cortex_embed_args).collect()[0].COMPLETE_CAL From f2ce44fa02fb38d0523885cde31ed1baf44bc62a Mon Sep 17 00:00:00 2001 From: alejandro Date: Wed, 17 Apr 2024 19:06:31 +0000 Subject: [PATCH 04/24] feat(dspy): added SnowflakeRM retriever and cleanup of Snowflake (LM) support --- pyproject.toml | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 9a998a3065..0b09881c68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,6 +118,7 @@ rich = "^13.7.1" psycopg2 = {version = "^2.9.9", optional = true} pgvector = {version = "^0.2.5", optional = true} structlog = "^24.1.0" +snowflake-snowpark-python = { version = "*",optional=true} [tool.poetry.group.dev.dependencies] diff --git a/setup.py b/setup.py index 7b2438131b..bac100c6ff 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ "faiss-cpu": ["sentence_transformers", "faiss-cpu"], "milvus": ["pymilvus~=2.3.7"], "google-vertex-ai": ["google-cloud-aiplatform==1.43.0"], + "snowflake" : ["snowflake-snowpark-python"] }, classifiers=[ "Development Status :: 3 - Alpha", From 6af2d47d05f182e18fdb715f79cc9c152adad2c2 Mon Sep 17 00:00:00 2001 From: alejandro Date: Wed, 17 Apr 2024 19:08:29 +0000 Subject: [PATCH 05/24] feat(dspy): added SnowflakeRM retriever and cleanup of Snowflake (LM) support --- setup.py | 68 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/setup.py b/setup.py index bac100c6ff..36b3e6e41c 100644 --- a/setup.py +++ b/setup.py @@ -1,45 +1,45 @@ from setuptools import find_packages, setup -# Read the content of the README file -with open('README.md', encoding='utf-8') as f: - long_description = f.read() +# Read the content of the README file +with open("README.md", encoding="utf-8") as f: + long_description = f.read() -# Read the content of the requirements.txt file -with open('requirements.txt', encoding='utf-8') as f: - requirements = f.read().splitlines() +# Read the content of the requirements.txt file +with open("requirements.txt", encoding="utf-8") as f: + requirements = f.read().splitlines() -setup( - name="dspy-ai", - version="2.4.3", - description="DSPy", - long_description=long_description, - long_description_content_type='text/markdown', - url="https://github.com/stanfordnlp/dsp", - author="Omar Khattab", - author_email="okhattab@stanford.edu", - license="MIT License", - packages=find_packages(include=['dsp.*', 'dspy.*', 'dsp', 'dspy']), - python_requires='>=3.9', - install_requires=requirements, +setup( + name="dspy-ai", + version="2.4.3", + description="DSPy", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/stanfordnlp/dsp", + author="Omar Khattab", + author_email="okhattab@stanford.edu", + license="MIT License", + packages=find_packages(include=["dsp.*", "dspy.*", "dsp", "dspy"]), + python_requires=">=3.9", + install_requires=requirements, extras_require={ "chromadb": ["chromadb~=0.4.14"], "qdrant": ["qdrant-client", "fastembed"], "marqo": ["marqo~=3.1.0"], - "mongodb": ["pymongo~=3.12.0"], - "pinecone": ["pinecone-client~=2.2.4"], - "weaviate": ["weaviate-client~=3.26.1"], + "mongodb": ["pymongo~=3.12.0"], + "pinecone": ["pinecone-client~=2.2.4"], + "weaviate": ["weaviate-client~=3.26.1"], "faiss-cpu": ["sentence_transformers", "faiss-cpu"], "milvus": ["pymilvus~=2.3.7"], "google-vertex-ai": ["google-cloud-aiplatform==1.43.0"], - "snowflake" : ["snowflake-snowpark-python"] - }, - classifiers=[ - "Development Status :: 3 - Alpha", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - ], -) + "snowflake": ["snowflake-snowpark-python"], + }, + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + ], +) From f42df77df10b5a6c41cce1ee67d28b98875e1479 Mon Sep 17 00:00:00 2001 From: alejandro Date: Wed, 17 Apr 2024 20:55:09 +0000 Subject: [PATCH 06/24] feat(dspy): added SnowflakeRM retriever and cleanup of Snowflake (LM) support --- dsp/modules/{snowflake.py => snowflakecortex.py} | 2 +- dspy/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename dsp/modules/{snowflake.py => snowflakecortex.py} (99%) diff --git a/dsp/modules/snowflake.py b/dsp/modules/snowflakecortex.py similarity index 99% rename from dsp/modules/snowflake.py rename to dsp/modules/snowflakecortex.py index 3cc7c4aa5d..2eb6b204c6 100644 --- a/dsp/modules/snowflake.py +++ b/dsp/modules/snowflakecortex.py @@ -31,7 +31,7 @@ def giveup_hdlr(details) -> bool: return True -class Snowflake(LM): +class SnowflakeCortex(LM): """Wrapper around Snowflake's CortexAPI. Currently supported models include 'mistral-large','reka-flash','mixtral-8x7b', diff --git a/dspy/__init__.py b/dspy/__init__.py index be397521d4..0a1ada3110 100644 --- a/dspy/__init__.py +++ b/dspy/__init__.py @@ -25,7 +25,7 @@ Google = dsp.Google GoogleVertexAI = dsp.GoogleVertexAI GROQ = dsp.GroqLM -Snowflake = dsp.Snowflake +SnowflakeCortex = dsp.SnowflakeCortex HFClientTGI = dsp.HFClientTGI HFClientVLLM = HFClientVLLM From 8ce13780e72e5406a4ad640b9ad8a6e28c939e82 Mon Sep 17 00:00:00 2001 From: alejandro Date: Wed, 17 Apr 2024 21:24:26 +0000 Subject: [PATCH 07/24] feat(dspy): added Snowflake Cortex (LM) and Snowflake RM support --- dsp/modules/__init__.py | 1 + dsp/modules/{snowflakecortex.py => snowflake.py} | 2 +- dspy/__init__.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) rename dsp/modules/{snowflakecortex.py => snowflake.py} (99%) diff --git a/dsp/modules/__init__.py b/dsp/modules/__init__.py index 948844a391..a57515e940 100644 --- a/dsp/modules/__init__.py +++ b/dsp/modules/__init__.py @@ -22,3 +22,4 @@ from .pyserini import * from .sbert import * from .sentence_vectorizer import * +from .snowflake import * diff --git a/dsp/modules/snowflakecortex.py b/dsp/modules/snowflake.py similarity index 99% rename from dsp/modules/snowflakecortex.py rename to dsp/modules/snowflake.py index 2eb6b204c6..3cc7c4aa5d 100644 --- a/dsp/modules/snowflakecortex.py +++ b/dsp/modules/snowflake.py @@ -31,7 +31,7 @@ def giveup_hdlr(details) -> bool: return True -class SnowflakeCortex(LM): +class Snowflake(LM): """Wrapper around Snowflake's CortexAPI. Currently supported models include 'mistral-large','reka-flash','mixtral-8x7b', diff --git a/dspy/__init__.py b/dspy/__init__.py index 0a1ada3110..be397521d4 100644 --- a/dspy/__init__.py +++ b/dspy/__init__.py @@ -25,7 +25,7 @@ Google = dsp.Google GoogleVertexAI = dsp.GoogleVertexAI GROQ = dsp.GroqLM -SnowflakeCortex = dsp.SnowflakeCortex +Snowflake = dsp.Snowflake HFClientTGI = dsp.HFClientTGI HFClientVLLM = HFClientVLLM From bf9247f08937089eccc5fbdba037420afe169472 Mon Sep 17 00:00:00 2001 From: alejandro Date: Mon, 6 May 2024 13:32:53 +0000 Subject: [PATCH 08/24] fix(dspy): removing unnecessary ipykernel dependency --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cfbf7d0659..ef683f595d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -129,7 +129,6 @@ pytest-mock = "^3.12.0" ruff = "^0.3.0" black = "^24.2.0" pre-commit = "^3.7.0" -ipykernel = "^6.29.4" [tool.poetry.extras] chromadb = ["chromadb"] From 4ed07531e5842f9f437722d7c93099613353398b Mon Sep 17 00:00:00 2001 From: alejandro Date: Mon, 6 May 2024 13:43:15 +0000 Subject: [PATCH 09/24] fix(dspy): updating import error handling language for SnowflakeRM --- dspy/retrieve/snowflake_rm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dspy/retrieve/snowflake_rm.py b/dspy/retrieve/snowflake_rm.py index ba807e132e..569383d0bf 100644 --- a/dspy/retrieve/snowflake_rm.py +++ b/dspy/retrieve/snowflake_rm.py @@ -11,8 +11,7 @@ except ImportError: raise ImportError( - "The 'snowflake' extra is required to use SnowflakeCortexRM.:" - "Install it with `pip install snowflake-snowpark-python`", + "The snowflake-snowpark-python library is required to use SnowflakeRM. Install it with dspy-ai[snowflake]" ) From 315d71abc994bbf991420d73e4ba9fc47f28d5be Mon Sep 17 00:00:00 2001 From: alejandro Date: Mon, 6 May 2024 13:49:21 +0000 Subject: [PATCH 10/24] fix(dspy): updating README to include Snowflake as optional supported extras dependency --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a9cfc6d4dd..a15661a71f 100644 --- a/README.md +++ b/README.md @@ -72,11 +72,11 @@ Or open our intro notebook in Google Colab: [ Date: Mon, 6 May 2024 13:55:46 +0000 Subject: [PATCH 12/24] fix(dspy): updating sort order to be descending for retriever results --- dspy/retrieve/snowflake_rm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspy/retrieve/snowflake_rm.py b/dspy/retrieve/snowflake_rm.py index db6355acf3..defa48ed2b 100644 --- a/dspy/retrieve/snowflake_rm.py +++ b/dspy/retrieve/snowflake_rm.py @@ -93,7 +93,7 @@ def _top_k_similar_chunks(self, query_embeddings, k): lit(query_embeddings).cast(VectorType(float, len(query_embeddings))), ).as_("dist"), ) - .sort("dist") + .sort("dist",ascending=False) .limit(k) ) From c663444fec6ed20f61171c3d5af15cac84a04b77 Mon Sep 17 00:00:00 2001 From: alejandro Date: Mon, 6 May 2024 14:29:28 +0000 Subject: [PATCH 13/24] docs(dspy): Adding documentation for Snowflake LM --- docs/api/language_model_clients/Snowflake.md | 32 ++++++++++++++++++++ dsp/modules/snowflake.py | 6 ++-- 2 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 docs/api/language_model_clients/Snowflake.md diff --git a/docs/api/language_model_clients/Snowflake.md b/docs/api/language_model_clients/Snowflake.md new file mode 100644 index 0000000000..b0eb5725fc --- /dev/null +++ b/docs/api/language_model_clients/Snowflake.md @@ -0,0 +1,32 @@ +--- +sidebar_position: +--- + +# dspy.Snowflake + +### Usage + +```python +lm = dspy.Snowflake(model="mixtral-8x7b",credentials=connection_parameters) +``` + +### Constructor + +The constructor inherits from the base class `LM` and verifies the `credentials` for using Snowflake API. + +```python +class Snowflake(LM): + def __init__( + self, + model, + credentials, + **kwargs): +``` + +**Parameters:** +- `model` (_str_): model hosted by [Snowflake Cortex](https://docs.snowflake.com/en/user-guide/snowflake-cortex/llm-functions#availability). +- `credentials` (_dict_): connection parameters required to initialize a [snowflake snowpark session](https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session) + +### Methods + +Refer to [`dspy.Snowflake`](https://dspy-docs.vercel.app/api/language_model_clients/Snowflake) documentation. diff --git a/dsp/modules/snowflake.py b/dsp/modules/snowflake.py index 3cc7c4aa5d..0d53ca8d3c 100644 --- a/dsp/modules/snowflake.py +++ b/dsp/modules/snowflake.py @@ -34,7 +34,7 @@ def giveup_hdlr(details) -> bool: class Snowflake(LM): """Wrapper around Snowflake's CortexAPI. - Currently supported models include 'mistral-large','reka-flash','mixtral-8x7b', + Currently supported models include 'snowflake-arctic','mistral-large','reka-flash','mixtral-8x7b', 'llama2-70b-chat','mistral-7b','gemma-7b'. """ @@ -44,7 +44,7 @@ def __init__(self, model: str = "mistral-large", credentials=None, **kwargs): ---------- model : str Which pre-trained model from Snowflake to use? - Choices are 'mistral-large','reka-flash','mixtral-8x7b','llama2-70b-chat','mistral-7b','gemma-7b' + Choices are 'snowflake-arctic','mistral-large','reka-flash','mixtral-8x7b','llama2-70b-chat','mistral-7b','gemma-7b' Full list of supported models is available here: https://docs.snowflake.com/en/user-guide/snowflake-cortex/llm-functions#complete **kwargs: dict Additional arguments to pass to the API provider. @@ -52,7 +52,7 @@ def __init__(self, model: str = "mistral-large", credentials=None, **kwargs): super().__init__(model) self.model = model - cortex_models = ["mistral-large", "reka-flash", "mixtral-8x7b", "llama2-70b-chat", "mistral-7b", "gemma-7b"] + cortex_models = ["snowflake-arctic","mistral-large", "reka-flash", "mixtral-8x7b", "llama2-70b-chat", "mistral-7b", "gemma-7b"] if model in cortex_models: self.available_args = { From 2cb77df507f507eb2029b096b91c9d39d59c15de Mon Sep 17 00:00:00 2001 From: alejandro Date: Mon, 6 May 2024 14:59:38 +0000 Subject: [PATCH 14/24] docs(dspy): Adding documentation for Snowflake RM --- .../retrieval_model_clients/SnowflakeRM.md | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 docs/api/retrieval_model_clients/SnowflakeRM.md diff --git a/docs/api/retrieval_model_clients/SnowflakeRM.md b/docs/api/retrieval_model_clients/SnowflakeRM.md new file mode 100644 index 0000000000..75a55e73d4 --- /dev/null +++ b/docs/api/retrieval_model_clients/SnowflakeRM.md @@ -0,0 +1,78 @@ +--- +sidebar_position: +--- + +# retrieve.SnowflakeRM + +### Constructor + +Initialize an instance of the `SnowflakeRM` class, with the option to use `e5-base-v2` embeddings or any Snowflake Cortex supported embeddings model. + +```python +SnowflakeRM( + snowflake_table_name: str, + snowflake_credentials: dict, + k: int = 3, + embeddings_field: str, + embeddings_text_field:str, + embeddings_model: str = "e5-base-v2", +) +``` + +**Parameters:** +- `snowflake_table_name (str)`: The name of the Snowflake table containing embeddings. +- `snowflake_credentials (dict)`: The connection parameters needed to initialize a Snowflake Snowpark Session. +- `k (int, optional)`: The number of top passages to retrieve. Defaults to 3. +- `embeddings_field (str)`: The name of the column in the snowflake table containing the embeddings. +- `embeddings_text_field (str)`: The function to convert a list of text to embeddings. + The embedding function should take a list of text strings as input and output a list of embeddings. + Defaults to None. By default, it will get OpenAI client by the environment variable OPENAI_API_KEY and use OpenAI's embedding model "text-embedding-3-small" with the default dimension. + +### Methods + +#### `forward(self, query_or_queries: Union[str, List[str]], k: Optional[int] = None) -> dspy.Prediction` + +Search the Milvus collection for the top `k` passages matching the given query or queries, using embeddings generated via the default `e5-base-v2` model or the specified `embedding_function`. + +**Parameters:** +- `query_or_queries` (_Union[str, List[str]]_): The query or list of queries to search for. +- `k` (_Optional[int]_, _optional_): The number of results to retrieve. If not specified, defaults to the value set during initialization. + +**Returns:** +- `dspy.Prediction`: Contains the retrieved passages, each represented as a `dotdict` with schema `[{"id": str, "score": float, "long_text": str, "metadatas": dict }]` + +### Quickstart + +To support passage retrieval, it assumes that a Snowflake table has been created and populated with the passages in a column `embeddings_text_field` and the embeddings in another column `embeddings_field` + +SnowflakeRM uses `e5-base-v2` embeddings model by default or any Snowflake Cortex supported embeddings model. + +#### Default OpenAI Embeddings + +```python +from dspy.retrieve.snowflake_rm import SnowflakeRM +import os + +connection_parameters = { + + "account": os.getenv('SNOWFLAKE_ACCOUNT'), + "user": os.getenv('SNOWFLAKE_USER'), + "password": os.getenv('SNOWFLAKE_PASSWORD'), + "role": os.getenv('SNOWFLAKE_ROLE'), + "warehouse": os.getenv('SNOWFLAKE_WAREHOUSE'), + "database": os.getenv('SNOWFLAKE_DATABASE'), + "schema": os.getenv('SNOWFLAKE_SCHEMA')} + +retriever_model = SnowflakeRM( + snowflake_table_name="", + snowflake_credentials=connection_parameters, + embeddings_field="", + embeddings_text_field= "" + ) + +results = retriever_model("Explore the meaning of life", k=5) + +for result in results: + print("Document:", result.long_text, "\n") +``` + From daef34e9a36ab31e7b27df47368d7400ce4c19e1 Mon Sep 17 00:00:00 2001 From: alejandro Date: Mon, 6 May 2024 15:09:47 +0000 Subject: [PATCH 15/24] docs(dspy): Adding documentation for Snowflake RM --- docs/api/retrieval_model_clients/SnowflakeRM.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/api/retrieval_model_clients/SnowflakeRM.md b/docs/api/retrieval_model_clients/SnowflakeRM.md index 75a55e73d4..e3808673db 100644 --- a/docs/api/retrieval_model_clients/SnowflakeRM.md +++ b/docs/api/retrieval_model_clients/SnowflakeRM.md @@ -23,16 +23,15 @@ SnowflakeRM( - `snowflake_table_name (str)`: The name of the Snowflake table containing embeddings. - `snowflake_credentials (dict)`: The connection parameters needed to initialize a Snowflake Snowpark Session. - `k (int, optional)`: The number of top passages to retrieve. Defaults to 3. -- `embeddings_field (str)`: The name of the column in the snowflake table containing the embeddings. -- `embeddings_text_field (str)`: The function to convert a list of text to embeddings. - The embedding function should take a list of text strings as input and output a list of embeddings. - Defaults to None. By default, it will get OpenAI client by the environment variable OPENAI_API_KEY and use OpenAI's embedding model "text-embedding-3-small" with the default dimension. +- `embeddings_field (str)`: The name of the column in the Snowflake table containing the embeddings. +- `embeddings_text_field (str)`: The name of the column in the Snowflake table containing the passages. +- `embeddings_model (str)`: The model to be used to convert text to embeddings ### Methods #### `forward(self, query_or_queries: Union[str, List[str]], k: Optional[int] = None) -> dspy.Prediction` -Search the Milvus collection for the top `k` passages matching the given query or queries, using embeddings generated via the default `e5-base-v2` model or the specified `embedding_function`. +Search the Snowflake table for the top `k` passages matching the given query or queries, using embeddings generated via the default `e5-base-v2` model or the specified `embedding_model`. **Parameters:** - `query_or_queries` (_Union[str, List[str]]_): The query or list of queries to search for. From 4f1065848af696e2469f55ce0676240405635a06 Mon Sep 17 00:00:00 2001 From: alejandro Date: Mon, 6 May 2024 15:14:51 +0000 Subject: [PATCH 16/24] fix(dspy): Adding self.history definition to Snowflake LM --- dsp/modules/snowflake.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dsp/modules/snowflake.py b/dsp/modules/snowflake.py index 0d53ca8d3c..ef46ef2a28 100644 --- a/dsp/modules/snowflake.py +++ b/dsp/modules/snowflake.py @@ -68,6 +68,7 @@ def __init__(self, model: str = "mistral-large", credentials=None, **kwargs): self.client = self._init_cortex(credentials=credentials) self.provider = "Snowflake" + self.history: list[dict[str, Any]] = [] self.kwargs = { **self.kwargs, "temperature": 0.7, From fad2f11dd56d998fad91ae141da561f2703a1c36 Mon Sep 17 00:00:00 2001 From: alejandro Date: Thu, 9 May 2024 01:08:53 +0000 Subject: [PATCH 17/24] fix(dspy): Adding missing LLMs supported in Snowflake and tag for session logs --- dsp/modules/snowflake.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dsp/modules/snowflake.py b/dsp/modules/snowflake.py index ef46ef2a28..d79bc1d47f 100644 --- a/dsp/modules/snowflake.py +++ b/dsp/modules/snowflake.py @@ -35,10 +35,10 @@ class Snowflake(LM): """Wrapper around Snowflake's CortexAPI. Currently supported models include 'snowflake-arctic','mistral-large','reka-flash','mixtral-8x7b', - 'llama2-70b-chat','mistral-7b','gemma-7b'. + 'llama2-70b-chat','mistral-7b','gemma-7b','llama3-8b','llama3-70b','reka-core'. """ - def __init__(self, model: str = "mistral-large", credentials=None, **kwargs): + def __init__(self, model: str = "mixtral-8x7b", credentials=None, **kwargs): """Parameters ---------- @@ -52,7 +52,8 @@ def __init__(self, model: str = "mistral-large", credentials=None, **kwargs): super().__init__(model) self.model = model - cortex_models = ["snowflake-arctic","mistral-large", "reka-flash", "mixtral-8x7b", "llama2-70b-chat", "mistral-7b", "gemma-7b"] + cortex_models = ["llama3-8b","llama3-70b","reka-core","snowflake-arctic","mistral-large", "reka-flash", "mixtral-8x7b", + "llama2-70b-chat", "mistral-7b", "gemma-7b"] if model in cortex_models: self.available_args = { @@ -80,7 +81,11 @@ def __init__(self, model: str = "mistral-large", credentials=None, **kwargs): @classmethod def _init_cortex(cls, credentials: dict) -> None: - return Session.builder.configs(credentials).create() + + session = Session.builder.configs(credentials).create() + session.query_tag = {"origin":"sf_sit", "name":"dspy", "version":{"major":1, "minor":0}} + + return session def _prepare_params( self, From 87756741ff47f962e237b4da3a42fee6463da364 Mon Sep 17 00:00:00 2001 From: alejandro Date: Thu, 9 May 2024 01:24:52 +0000 Subject: [PATCH 18/24] fix(dspy): adding language for recently released snowflake retriever model and updating snowflake session init to also include session log --- docs/api/retrieval_model_clients/SnowflakeRM.md | 2 +- dspy/retrieve/snowflake_rm.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/api/retrieval_model_clients/SnowflakeRM.md b/docs/api/retrieval_model_clients/SnowflakeRM.md index e3808673db..7b7a3ee5c2 100644 --- a/docs/api/retrieval_model_clients/SnowflakeRM.md +++ b/docs/api/retrieval_model_clients/SnowflakeRM.md @@ -6,7 +6,7 @@ sidebar_position: ### Constructor -Initialize an instance of the `SnowflakeRM` class, with the option to use `e5-base-v2` embeddings or any Snowflake Cortex supported embeddings model. +Initialize an instance of the `SnowflakeRM` class, with the option to use `e5-base-v2` or `snowflake-arctic-embed-m` embeddings or any other Snowflake Cortex supported embeddings model. ```python SnowflakeRM( diff --git a/dspy/retrieve/snowflake_rm.py b/dspy/retrieve/snowflake_rm.py index defa48ed2b..52a73bd6fa 100644 --- a/dspy/retrieve/snowflake_rm.py +++ b/dspy/retrieve/snowflake_rm.py @@ -42,7 +42,7 @@ def __init__( self.embeddings_field = embeddings_field self.embeddings_text_field = embeddings_text_field self.embeddings_model = embeddings_model - self.client = Session.builder.configs(snowflake_credentials).create() + self.client = self._init_cortex(credentials=snowflake_credentials) super().__init__(k=k) @@ -99,6 +99,14 @@ def _top_k_similar_chunks(self, query_embeddings, k): return top_k.select(doc_table_key).to_pandas().values + @classmethod + def _init_cortex(cls, credentials: dict) -> None: + + session = Session.builder.configs(credentials).create() + session.query_tag = {"origin":"sf_sit", "name":"dspy", "version":{"major":1, "minor":0}} + + return session + def _get_embeddings(self, query: str) -> list[float]: # create embeddings for the query embed = snow_fn.builtin("snowflake.cortex.embed_text") From a048a511f8e95b2916a802c55481bd4ce1001ae8 Mon Sep 17 00:00:00 2001 From: alejandro Date: Thu, 9 May 2024 20:36:42 +0000 Subject: [PATCH 19/24] fix(dspy): updating syntax for embeddings method call which will be deprecated --- docs/api/retrieval_model_clients/SnowflakeRM.md | 10 ++++++---- dspy/retrieve/snowflake_rm.py | 9 ++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/api/retrieval_model_clients/SnowflakeRM.md b/docs/api/retrieval_model_clients/SnowflakeRM.md index 7b7a3ee5c2..78695dc0c6 100644 --- a/docs/api/retrieval_model_clients/SnowflakeRM.md +++ b/docs/api/retrieval_model_clients/SnowflakeRM.md @@ -1,5 +1,5 @@ --- -sidebar_position: +sidebar_position: --- # retrieve.SnowflakeRM @@ -20,6 +20,7 @@ SnowflakeRM( ``` **Parameters:** + - `snowflake_table_name (str)`: The name of the Snowflake table containing embeddings. - `snowflake_credentials (dict)`: The connection parameters needed to initialize a Snowflake Snowpark Session. - `k (int, optional)`: The number of top passages to retrieve. Defaults to 3. @@ -34,10 +35,12 @@ SnowflakeRM( Search the Snowflake table for the top `k` passages matching the given query or queries, using embeddings generated via the default `e5-base-v2` model or the specified `embedding_model`. **Parameters:** + - `query_or_queries` (_Union[str, List[str]]_): The query or list of queries to search for. - `k` (_Optional[int]_, _optional_): The number of results to retrieve. If not specified, defaults to the value set during initialization. **Returns:** + - `dspy.Prediction`: Contains the retrieved passages, each represented as a `dotdict` with schema `[{"id": str, "score": float, "long_text": str, "metadatas": dict }]` ### Quickstart @@ -53,14 +56,14 @@ from dspy.retrieve.snowflake_rm import SnowflakeRM import os connection_parameters = { - + "account": os.getenv('SNOWFLAKE_ACCOUNT'), "user": os.getenv('SNOWFLAKE_USER'), "password": os.getenv('SNOWFLAKE_PASSWORD'), "role": os.getenv('SNOWFLAKE_ROLE'), "warehouse": os.getenv('SNOWFLAKE_WAREHOUSE'), "database": os.getenv('SNOWFLAKE_DATABASE'), - "schema": os.getenv('SNOWFLAKE_SCHEMA')} + "schema": os.getenv('SNOWFLAKE_SCHEMA')} retriever_model = SnowflakeRM( snowflake_table_name="", @@ -74,4 +77,3 @@ results = retriever_model("Explore the meaning of life", k=5) for result in results: print("Document:", result.long_text, "\n") ``` - diff --git a/dspy/retrieve/snowflake_rm.py b/dspy/retrieve/snowflake_rm.py index 52a73bd6fa..ec7dc389a4 100644 --- a/dspy/retrieve/snowflake_rm.py +++ b/dspy/retrieve/snowflake_rm.py @@ -11,7 +11,7 @@ except ImportError: raise ImportError( - "The snowflake-snowpark-python library is required to use SnowflakeRM. Install it with dspy-ai[snowflake]" + "The snowflake-snowpark-python library is required to use SnowflakeRM. Install it with dspy-ai[snowflake]", ) @@ -93,7 +93,7 @@ def _top_k_similar_chunks(self, query_embeddings, k): lit(query_embeddings).cast(VectorType(float, len(query_embeddings))), ).as_("dist"), ) - .sort("dist",ascending=False) + .sort("dist", ascending=False) .limit(k) ) @@ -101,15 +101,14 @@ def _top_k_similar_chunks(self, query_embeddings, k): @classmethod def _init_cortex(cls, credentials: dict) -> None: - session = Session.builder.configs(credentials).create() - session.query_tag = {"origin":"sf_sit", "name":"dspy", "version":{"major":1, "minor":0}} + session.query_tag = {"origin": "sf_sit", "name": "dspy", "version": {"major": 1, "minor": 0}} return session def _get_embeddings(self, query: str) -> list[float]: # create embeddings for the query - embed = snow_fn.builtin("snowflake.cortex.embed_text") + embed = snow_fn.builtin("snowflake.cortex.embed_text_768") cortex_embed_args = embed(snow_fn.lit(self.embeddings_model), snow_fn.lit(query)) return self.client.range(1).withColumn("complete_cal", cortex_embed_args).collect()[0].COMPLETE_CAL From b3af69cc0528c5e53c57f5ec8c7694b3f4da1f66 Mon Sep 17 00:00:00 2001 From: alejandro Date: Thu, 9 May 2024 20:44:11 +0000 Subject: [PATCH 20/24] fix(dspy): updating syntax for Snowflake cos similarity method which will be deprecated at the end of the month --- dspy/retrieve/snowflake_rm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dspy/retrieve/snowflake_rm.py b/dspy/retrieve/snowflake_rm.py index ec7dc389a4..fc8bd89527 100644 --- a/dspy/retrieve/snowflake_rm.py +++ b/dspy/retrieve/snowflake_rm.py @@ -6,7 +6,7 @@ try: from snowflake.snowpark import Session from snowflake.snowpark import functions as snow_fn - from snowflake.snowpark.functions import lit, vector_cosine_distance + from snowflake.snowpark.functions import lit, vector_cosine_similarity from snowflake.snowpark.types import VectorType except ImportError: @@ -88,7 +88,7 @@ def _top_k_similar_chunks(self, query_embeddings, k): doc_embeddings.select( doc_table_value, doc_table_key, - vector_cosine_distance( + vector_cosine_similarity( doc_embeddings.col(doc_table_value), lit(query_embeddings).cast(VectorType(float, len(query_embeddings))), ).as_("dist"), From 53630da5f5789c2a43090429062c6cf979202243 Mon Sep 17 00:00:00 2001 From: alejandro Date: Thu, 9 May 2024 20:58:30 +0000 Subject: [PATCH 21/24] fix(dspy): updating syntax for Snowflake cos similarity method which will be deprecated at the end of the month --- dspy/retrieve/snowflake_rm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspy/retrieve/snowflake_rm.py b/dspy/retrieve/snowflake_rm.py index fc8bd89527..d9067bffae 100644 --- a/dspy/retrieve/snowflake_rm.py +++ b/dspy/retrieve/snowflake_rm.py @@ -6,7 +6,7 @@ try: from snowflake.snowpark import Session from snowflake.snowpark import functions as snow_fn - from snowflake.snowpark.functions import lit, vector_cosine_similarity + from snowflake.snowpark.functions import lit from snowflake.snowpark.types import VectorType except ImportError: From b0b18a7d7d19ea3663a1502faab5a4e3bc0afbee Mon Sep 17 00:00:00 2001 From: alejandro Date: Fri, 10 May 2024 01:33:42 +0000 Subject: [PATCH 22/24] fix(dspy): updating syntax for Snowflake cos similarity method which will be deprecated at the end of the month --- dspy/retrieve/snowflake_rm.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dspy/retrieve/snowflake_rm.py b/dspy/retrieve/snowflake_rm.py index d9067bffae..47ee559e16 100644 --- a/dspy/retrieve/snowflake_rm.py +++ b/dspy/retrieve/snowflake_rm.py @@ -6,7 +6,7 @@ try: from snowflake.snowpark import Session from snowflake.snowpark import functions as snow_fn - from snowflake.snowpark.functions import lit + from snowflake.snowpark.functions import lit, function, col from snowflake.snowpark.types import VectorType except ImportError: @@ -84,11 +84,13 @@ def _top_k_similar_chunks(self, query_embeddings, k): doc_table_key = self.embeddings_text_field doc_embeddings = self.client.table(self.snowflake_table_name) + cosine_similarity = function("vector_cosine_similarity") + top_k = ( doc_embeddings.select( doc_table_value, doc_table_key, - vector_cosine_similarity( + cosine_similarity( doc_embeddings.col(doc_table_value), lit(query_embeddings).cast(VectorType(float, len(query_embeddings))), ).as_("dist"), From 8dedd000d59c6c45cda037faf223eaa75c6bee86 Mon Sep 17 00:00:00 2001 From: alejandro Date: Fri, 10 May 2024 15:23:57 +0000 Subject: [PATCH 23/24] fix(dspy): solving for null response bug in Cortex API --- dsp/modules/snowflake.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/dsp/modules/snowflake.py b/dsp/modules/snowflake.py index d79bc1d47f..851159936f 100644 --- a/dsp/modules/snowflake.py +++ b/dsp/modules/snowflake.py @@ -52,8 +52,18 @@ def __init__(self, model: str = "mixtral-8x7b", credentials=None, **kwargs): super().__init__(model) self.model = model - cortex_models = ["llama3-8b","llama3-70b","reka-core","snowflake-arctic","mistral-large", "reka-flash", "mixtral-8x7b", - "llama2-70b-chat", "mistral-7b", "gemma-7b"] + cortex_models = [ + "llama3-8b", + "llama3-70b", + "reka-core", + "snowflake-arctic", + "mistral-large", + "reka-flash", + "mixtral-8x7b", + "llama2-70b-chat", + "mistral-7b", + "gemma-7b", + ] if model in cortex_models: self.available_args = { @@ -81,9 +91,8 @@ def __init__(self, model: str = "mixtral-8x7b", credentials=None, **kwargs): @classmethod def _init_cortex(cls, credentials: dict) -> None: - session = Session.builder.configs(credentials).create() - session.query_tag = {"origin":"sf_sit", "name":"dspy", "version":{"major":1, "minor":0}} + session.query_tag = {"origin": "sf_sit", "name": "dspy", "version": {"major": 1, "minor": 0}} return session @@ -103,9 +112,13 @@ def _cortex_complete_request(self, prompt: str, **kwargs) -> dict: snow_func.lit([{"role": "user", "content": prompt}]), snow_func.lit(kwargs), ) - raw_response = self.client.range(1).withColumn("complete_cal", cortex_complete_args).collect()[0].COMPLETE_CAL + raw_response = self.client.range(1).withColumn("complete_cal", cortex_complete_args).collect() - return json.loads(raw_response) + if len(raw_response) > 0: + return json.loads(raw_response[0].COMPLETE_CAL) + + else: + return json.loads('{"choices": [{"messages": "None"}]}') def basic_request(self, prompt: str, **kwargs) -> list: raw_kwargs = kwargs From 23cd780897c908f61906069c810aae4ae0c89b9f Mon Sep 17 00:00:00 2001 From: alejandro Date: Sat, 11 May 2024 19:10:06 +0000 Subject: [PATCH 24/24] fix(dspy): adding LM connection parameter update to docs, resolving ruff issues and failing tests --- docs/api/language_model_clients/Snowflake.md | 13 + dsp/modules/snowflake.py | 3 + dspy/__init__.py | 1 - dspy/retrieve/snowflake_rm.py | 2 +- poetry.lock | 257 +++++++++++++++---- pyproject.toml | 2 +- 6 files changed, 228 insertions(+), 50 deletions(-) diff --git a/docs/api/language_model_clients/Snowflake.md b/docs/api/language_model_clients/Snowflake.md index b0eb5725fc..db1429858b 100644 --- a/docs/api/language_model_clients/Snowflake.md +++ b/docs/api/language_model_clients/Snowflake.md @@ -7,6 +7,19 @@ sidebar_position: ### Usage ```python +import dspy +import os + +connection_parameters = { + + "account": os.getenv('SNOWFLAKE_ACCOUNT'), + "user": os.getenv('SNOWFLAKE_USER'), + "password": os.getenv('SNOWFLAKE_PASSWORD'), + "role": os.getenv('SNOWFLAKE_ROLE'), + "warehouse": os.getenv('SNOWFLAKE_WAREHOUSE'), + "database": os.getenv('SNOWFLAKE_DATABASE'), + "schema": os.getenv('SNOWFLAKE_SCHEMA')} + lm = dspy.Snowflake(model="mixtral-8x7b",credentials=connection_parameters) ``` diff --git a/dsp/modules/snowflake.py b/dsp/modules/snowflake.py index 851159936f..09bffd787a 100644 --- a/dsp/modules/snowflake.py +++ b/dsp/modules/snowflake.py @@ -46,6 +46,9 @@ def __init__(self, model: str = "mixtral-8x7b", credentials=None, **kwargs): Which pre-trained model from Snowflake to use? Choices are 'snowflake-arctic','mistral-large','reka-flash','mixtral-8x7b','llama2-70b-chat','mistral-7b','gemma-7b' Full list of supported models is available here: https://docs.snowflake.com/en/user-guide/snowflake-cortex/llm-functions#complete + credentials: dict + Snowflake credentials required to initialize the session. + Full list of requirements can be found here: https://docs.snowflake.com/en/developer-guide/snowpark/reference/python/latest/api/snowflake.snowpark.Session **kwargs: dict Additional arguments to pass to the API provider. """ diff --git a/dspy/__init__.py b/dspy/__init__.py index 70bb35d26a..da659a2f45 100644 --- a/dspy/__init__.py +++ b/dspy/__init__.py @@ -7,7 +7,6 @@ from .signatures import * from .utils.logging import logger, set_log_output - # Functional must be imported after primitives, predict and signatures from .functional import * # isort: skip diff --git a/dspy/retrieve/snowflake_rm.py b/dspy/retrieve/snowflake_rm.py index 47ee559e16..40aac2b59f 100644 --- a/dspy/retrieve/snowflake_rm.py +++ b/dspy/retrieve/snowflake_rm.py @@ -6,7 +6,7 @@ try: from snowflake.snowpark import Session from snowflake.snowpark import functions as snow_fn - from snowflake.snowpark.functions import lit, function, col + from snowflake.snowpark.functions import col, function, lit from snowflake.snowpark.types import VectorType except ImportError: diff --git a/poetry.lock b/poetry.lock index e88f6b6f1e..7d9f6b8516 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -201,7 +201,7 @@ trio = ["trio (>=0.23)"] name = "appnope" version = "0.1.4" description = "Disable App Nap on macOS >= 10.9" -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c"}, @@ -282,11 +282,22 @@ typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} [package.extras] tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] +[[package]] +name = "asn1crypto" +version = "1.5.1" +description = "Fast ASN.1 parser and serializer with definitions for private keys, public keys, certificates, CRL, OCSP, CMS, PKCS#3, PKCS#7, PKCS#8, PKCS#12, PKCS#5, X.509 and TSP" +optional = true +python-versions = "*" +files = [ + {file = "asn1crypto-1.5.1-py2.py3-none-any.whl", hash = "sha256:db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67"}, + {file = "asn1crypto-1.5.1.tar.gz", hash = "sha256:13ae38502be632115abf8a24cbe5f4da52e3b5231990aff31123c805306ccb9c"}, +] + [[package]] name = "asttokens" version = "2.4.1" description = "Annotate AST trees with source code positions" -optional = false +optional = true python-versions = "*" files = [ {file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"}, @@ -640,7 +651,7 @@ files = [ name = "cffi" version = "1.16.0" description = "Foreign Function Interface for Python calling C code." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, @@ -902,6 +913,17 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "cloudpickle" +version = "2.2.1" +description = "Extended pickling support for Python objects" +optional = true +python-versions = ">=3.6" +files = [ + {file = "cloudpickle-2.2.1-py3-none-any.whl", hash = "sha256:61f594d1f4c295fa5cd9014ceb3a1fc4a70b0de1164b94fbc2d854ccba056f9f"}, + {file = "cloudpickle-2.2.1.tar.gz", hash = "sha256:d89684b8de9e34a2a43b3460fbca07d09d6e25ce858df4d5a44240403b6178f5"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -951,7 +973,7 @@ development = ["black", "flake8", "mypy", "pytest", "types-colorama"] name = "comm" version = "0.2.2" description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"}, @@ -1066,7 +1088,7 @@ vision = ["Pillow (>=6.2.1)"] name = "debugpy" version = "1.8.1" description = "An implementation of the Debug Adapter Protocol for Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "debugpy-1.8.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:3bda0f1e943d386cc7a0e71bfa59f4137909e2ed947fb3946c506e113000f741"}, @@ -1097,7 +1119,7 @@ files = [ name = "decorator" version = "5.1.1" description = "Decorators for Humans" -optional = false +optional = true python-versions = ">=3.5" files = [ {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, @@ -1243,7 +1265,7 @@ test = ["pytest (>=6)"] name = "executing" version = "2.0.1" description = "Get the currently executing AST node of a frame, and other information" -optional = false +optional = true python-versions = ">=3.5" files = [ {file = "executing-2.0.1-py2.py3-none-any.whl", hash = "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc"}, @@ -2102,7 +2124,7 @@ files = [ name = "ipykernel" version = "6.29.4" description = "IPython Kernel for Jupyter" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "ipykernel-6.29.4-py3-none-any.whl", hash = "sha256:1181e653d95c6808039c509ef8e67c4126b3b3af7781496c7cbfb5ed938a27da"}, @@ -2135,7 +2157,7 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio name = "ipython" version = "8.18.1" description = "IPython: Productive Interactive Computing" -optional = false +optional = true python-versions = ">=3.9" files = [ {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"}, @@ -2186,7 +2208,7 @@ six = "*" name = "jedi" version = "0.19.1" description = "An autocompletion tool for Python that can be used for text editors." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "jedi-0.19.1-py2.py3-none-any.whl", hash = "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0"}, @@ -2306,7 +2328,7 @@ testing = ["coverage", "ipykernel", "jupytext", "matplotlib", "nbdime", "nbforma name = "jupyter-client" version = "8.6.1" description = "Jupyter protocol implementation and client libraries" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "jupyter_client-8.6.1-py3-none-any.whl", hash = "sha256:3b7bd22f058434e3b9a7ea4b1500ed47de2713872288c0d511d19926f99b459f"}, @@ -2329,7 +2351,7 @@ test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pyt name = "jupyter-core" version = "5.7.2" description = "Jupyter core package. A base package on which Jupyter projects rely." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409"}, @@ -2574,7 +2596,7 @@ name = "matplotlib-inline" version = "0.1.7" description = "Inline Matplotlib backend for Jupyter" optional = true -python-versions = ">=3.5" +python-versions = ">=3.8" files = [ {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"}, {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"}, @@ -3213,7 +3235,7 @@ test = ["pep440", "pre-commit", "pytest", "testpath"] name = "nest-asyncio" version = "1.6.0" description = "Patch asyncio to allow nested event loops" -optional = false +optional = true python-versions = ">=3.5" files = [ {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, @@ -3920,7 +3942,7 @@ xml = ["lxml (>=4.9.2)"] name = "parso" version = "0.8.4" description = "A Python Parser" -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"}, @@ -3946,7 +3968,7 @@ files = [ name = "pexpect" version = "4.9.0" description = "Pexpect allows easy control of interactive console applications." -optional = false +optional = true python-versions = "*" files = [ {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"}, @@ -4089,7 +4111,7 @@ virtualenv = ">=20.10.0" name = "prompt-toolkit" version = "3.0.43" description = "Library for building powerful interactive command lines in Python" -optional = false +optional = true python-versions = ">=3.7.0" files = [ {file = "prompt_toolkit-3.0.43-py3-none-any.whl", hash = "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6"}, @@ -4123,7 +4145,7 @@ files = [ name = "psutil" version = "5.9.8" description = "Cross-platform lib for process and system monitoring in Python." -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"}, @@ -4173,7 +4195,7 @@ files = [ name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" -optional = false +optional = true python-versions = "*" files = [ {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"}, @@ -4231,7 +4253,7 @@ functions = ["apache-bookkeeper-client (>=4.16.1)", "grpcio (>=1.60.0)", "promet name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" -optional = false +optional = true python-versions = "*" files = [ {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"}, @@ -4341,7 +4363,7 @@ name = "pycparser" version = "2.22" description = "C parser in Python" optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.8" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, @@ -4531,6 +4553,23 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pyjwt" +version = "2.8.0" +description = "JSON Web Token implementation in Python" +optional = true +python-versions = ">=3.7" +files = [ + {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, + {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, +] + +[package.extras] +crypto = ["cryptography (>=3.4.0)"] +dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] +tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] + [[package]] name = "pymdown-extensions" version = "10.8.1" @@ -4577,6 +4616,24 @@ ujson = ">=2.0.0" model = ["milvus-model (>=0.1.0)"] test = ["black", "grpcio-testing", "pytest (>=5.3.4)", "pytest-cov (>=2.8.1)", "pytest-timeout (>=1.3.4)", "ruff (>=0.3.3)"] +[[package]] +name = "pyopenssl" +version = "24.1.0" +description = "Python wrapper module around the OpenSSL library" +optional = true +python-versions = ">=3.7" +files = [ + {file = "pyOpenSSL-24.1.0-py3-none-any.whl", hash = "sha256:17ed5be5936449c5418d1cd269a1a9e9081bc54c17aed272b45856a3d3dc86ad"}, + {file = "pyOpenSSL-24.1.0.tar.gz", hash = "sha256:cabed4bfaa5df9f1a16c0ef64a0cb65318b5cd077a7eda7d6970131ca2f41a6f"}, +] + +[package.dependencies] +cryptography = ">=41.0.5,<43" + +[package.extras] +docs = ["sphinx (!=5.2.0,!=5.2.0.post0,!=7.2.5)", "sphinx-rtd-theme"] +test = ["pretend", "pytest (>=3.0.1)", "pytest-rerunfailures"] + [[package]] name = "pyparsing" version = "3.1.2" @@ -4721,7 +4778,7 @@ files = [ name = "pywin32" version = "306" description = "Python for Window Extensions" -optional = false +optional = true python-versions = "*" files = [ {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, @@ -4819,7 +4876,7 @@ name = "pyzmq" version = "26.0.3" description = "Python bindings for 0MQ" optional = true -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ {file = "pyzmq-26.0.3-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:44dd6fc3034f1eaa72ece33588867df9e006a7303725a12d64c3dff92330f625"}, {file = "pyzmq-26.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:acb704195a71ac5ea5ecf2811c9ee19ecdc62b91878528302dd0be1b9451cc90"}, @@ -5503,6 +5560,105 @@ files = [ {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"}, ] +[[package]] +name = "snowflake-connector-python" +version = "3.10.0" +description = "Snowflake Connector for Python" +optional = true +python-versions = ">=3.8" +files = [ + {file = "snowflake_connector_python-3.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e2afca4bca70016519d1a7317c498f1d9c56140bf3e40ea40bddcc95fe827ca"}, + {file = "snowflake_connector_python-3.10.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:d19bde29f89b226eb22af4c83134ecb5c229da1d5e960a01b8f495df78dcdc36"}, + {file = "snowflake_connector_python-3.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bfe013ed97b4dd2e191fd6770a14030d29dd0108817d6ce76b9773250dd2d560"}, + {file = "snowflake_connector_python-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0917c9f9382d830907e1a18ee1208537b203618700a9c671c2a20167b30f574"}, + {file = "snowflake_connector_python-3.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:7e828bc99240433e6552ac4cc4e37f223ae5c51c7880458ddb281668503c7491"}, + {file = "snowflake_connector_python-3.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a0d3d06d758455c50b998eabc1fd972a1f67faa5c85ef250fd5986f5a41aab0b"}, + {file = "snowflake_connector_python-3.10.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:4602cb19b204bb03e03d65c6d5328467c9efc0fec53ca56768c3747c8dc8a70f"}, + {file = "snowflake_connector_python-3.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb1a04b496bbd3e1e2e926df82b2369887b2eea958f535fb934c240bfbabf6c5"}, + {file = "snowflake_connector_python-3.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c889f9f60f915d657e0a0ad2e6cc52cdcafd9bcbfa95a095aadfd8bcae62b819"}, + {file = "snowflake_connector_python-3.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:8e441484216ed416a6ed338133e23bd991ac4ba2e46531f4d330f61568c49314"}, + {file = "snowflake_connector_python-3.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bb4aced19053c67513cecc92311fa9d3b507b2277698c8e987d404f6f3a49fb2"}, + {file = "snowflake_connector_python-3.10.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:858315a2feff86213b079c6293ad8d850a778044c664686802ead8bb1337e1bc"}, + {file = "snowflake_connector_python-3.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:adf16e1ca9f46d3bdf68e955ffa42075ebdb251e3b13b59003d04e4fea7d579a"}, + {file = "snowflake_connector_python-3.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4c5c2a08b39086a5348502652ad4fdf24871d7ab30fd59f6b7b57249158468c"}, + {file = "snowflake_connector_python-3.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:05011286f42c52eb3e5a6db59ee3eaf79f3039f3a19d7ffac6f4ee143779c637"}, + {file = "snowflake_connector_python-3.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:569301289ada5b0d72d0bd8432b7ca180220335faa6d9a0f7185f60891db6f2c"}, + {file = "snowflake_connector_python-3.10.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:4e5641c70a12da9804b74f350b8cbbdffdc7aca5069b096755abd2a1fdcf5d1b"}, + {file = "snowflake_connector_python-3.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12ff767a1b8c48431549ac28884f8bd9647e63a23f470b05f6ab8d143c4b1475"}, + {file = "snowflake_connector_python-3.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e52bbc1e2e7bda956525b4229d7f87579f8cabd7d5506b12aa754c4bcdc8c8d7"}, + {file = "snowflake_connector_python-3.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:280a8dcca0249e864419564e38764c08f8841900d9872fec2f2855fda494b29f"}, + {file = "snowflake_connector_python-3.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:67bf570230b0cf818e6766c17245c7355a1f5ea27778e54ab8d09e5bb3536ad9"}, + {file = "snowflake_connector_python-3.10.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:aa1e26f9c571d2c4206da5c978c1b345ffd798d3db1f9ae91985e6243c6bf94b"}, + {file = "snowflake_connector_python-3.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73e9baa531d5156a03bfe5af462cf6193ec2a01cbb575edf7a2dd3b2a35254c7"}, + {file = "snowflake_connector_python-3.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e03361c4749e4d65bf0d223fdea1c2d7a33af53b74e873929a6085d150aff17e"}, + {file = "snowflake_connector_python-3.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:e8cddd4357e70ab55d7aeeed144cbbeb1ff658b563d7d8d307afc06178a367ec"}, + {file = "snowflake_connector_python-3.10.0.tar.gz", hash = "sha256:7c7438e958753bd1174b73581d77c92b0b47a86c38d8ea0ba1ea23c442eb8e75"}, +] + +[package.dependencies] +asn1crypto = ">0.24.0,<2.0.0" +certifi = ">=2017.4.17" +cffi = ">=1.9,<2.0.0" +charset-normalizer = ">=2,<4" +cryptography = ">=3.1.0,<43.0.0" +filelock = ">=3.5,<4" +idna = ">=2.5,<4" +packaging = "*" +platformdirs = ">=2.6.0,<5.0.0" +pyjwt = "<3.0.0" +pyOpenSSL = ">=16.2.0,<25.0.0" +pytz = "*" +requests = "<3.0.0" +sortedcontainers = ">=2.4.0" +tomlkit = "*" +typing-extensions = ">=4.3,<5" +urllib3 = {version = ">=1.21.1,<2.0.0", markers = "python_version < \"3.10\""} + +[package.extras] +development = ["Cython", "coverage", "more-itertools", "numpy (<1.27.0)", "pendulum (!=2.1.1)", "pexpect", "pytest (<7.5.0)", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist", "pytzdata"] +pandas = ["pandas (>=1.0.0,<3.0.0)", "pyarrow"] +secure-local-storage = ["keyring (>=23.1.0,<25.0.0)"] + +[[package]] +name = "snowflake-snowpark-python" +version = "1.16.0" +description = "Snowflake Snowpark for Python" +optional = true +python-versions = "<3.12,>=3.8" +files = [ + {file = "snowflake_snowpark_python-1.16.0-py3-none-any.whl", hash = "sha256:3b3713235644bfa463f41a72e368e0007667c4efb91d770e9a5681164e495aee"}, + {file = "snowflake_snowpark_python-1.16.0.tar.gz", hash = "sha256:b6c25fa37878f250ee8dca40c83bf556bc6d983be85818fd0767fcee893f9112"}, +] + +[package.dependencies] +cloudpickle = [ + {version = ">=1.6.0,<2.1.0 || >2.1.0,<2.2.0 || >2.2.0,<=2.2.1", markers = "python_version < \"3.11\""}, + {version = "2.2.1", markers = "python_version ~= \"3.11\""}, +] +pyyaml = "*" +setuptools = ">=40.6.0" +snowflake-connector-python = ">=3.10.0,<4.0.0" +typing-extensions = ">=4.1.0,<5.0.0" +wheel = "*" + +[package.extras] +development = ["cachetools", "coverage", "pre-commit", "pytest (<8.0.0)", "pytest-cov", "pytest-timeout", "sphinx (==5.0.2)"] +localtest = ["pandas", "pyarrow", "requests"] +opentelemetry = ["opentelemetry-api (>=1.0.0,<2.0.0)", "opentelemetry-sdk (>=1.0.0,<2.0.0)"] +pandas = ["snowflake-connector-python[pandas] (>=3.10.0,<4.0.0)"] +secure-local-storage = ["snowflake-connector-python[secure-local-storage] (>=3.10.0,<4.0.0)"] + +[[package]] +name = "sortedcontainers" +version = "2.4.0" +description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +optional = true +python-versions = "*" +files = [ + {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, + {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, +] + [[package]] name = "soupsieve" version = "2.5" @@ -5838,7 +5994,7 @@ sqlcipher = ["sqlcipher3_binary"] name = "stack-data" version = "0.6.3" description = "Extract data from python stack frames and tracebacks for informative displays" -optional = false +optional = true python-versions = "*" files = [ {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"}, @@ -6092,6 +6248,17 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "tomlkit" +version = "0.12.5" +description = "Style preserving TOML library" +optional = true +python-versions = ">=3.7" +files = [ + {file = "tomlkit-0.12.5-py3-none-any.whl", hash = "sha256:af914f5a9c59ed9d0762c7b64d3b5d5df007448eb9cd2edc8a46b1eafead172f"}, + {file = "tomlkit-0.12.5.tar.gz", hash = "sha256:eef34fba39834d4d6b73c9ba7f3e4d1c417a4e56f89a7e96e090dd0d24b8fb3c"}, +] + [[package]] name = "torch" version = "2.3.0" @@ -6150,7 +6317,7 @@ optree = ["optree (>=0.9.1)"] name = "tornado" version = "6.4" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -optional = false +optional = true python-versions = ">= 3.8" files = [ {file = "tornado-6.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:02ccefc7d8211e5a7f9e8bc3f9e5b0ad6262ba2fbb683a6443ecc804e5224ce0"}, @@ -6190,7 +6357,7 @@ telegram = ["requests"] name = "traitlets" version = "5.14.3" description = "Traitlets Python configuration system" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"}, @@ -6421,23 +6588,6 @@ brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotl secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] -[[package]] -name = "urllib3" -version = "2.2.1" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.8" -files = [ - {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, - {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, -] - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - [[package]] name = "uvicorn" version = "0.29.0" @@ -6685,7 +6835,7 @@ anyio = ">=3.0.0" name = "wcwidth" version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" -optional = false +optional = true python-versions = "*" files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, @@ -6810,6 +6960,20 @@ files = [ {file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"}, ] +[[package]] +name = "wheel" +version = "0.43.0" +description = "A built-package format for Python" +optional = true +python-versions = ">=3.8" +files = [ + {file = "wheel-0.43.0-py3-none-any.whl", hash = "sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81"}, + {file = "wheel-0.43.0.tar.gz", hash = "sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85"}, +] + +[package.extras] +test = ["pytest (>=6.0.0)", "setuptools (>=65)"] + [[package]] name = "win32-setctime" version = "1.1.0" @@ -7153,5 +7317,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "dfb44251298e064041c90ee3a63a1cf5baaf2c6ce5c4bbaa4a036247c74852a8" - +content-hash = "d5fc4db9e32e22b358c93b3e026dbecbfd1f32fc4c4106112ba95e37c0aa259d" diff --git a/pyproject.toml b/pyproject.toml index 0b988e7ef9..7557fdce05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,7 +120,7 @@ rich = "^13.7.1" psycopg2 = { version = "^2.9.9", optional = true } pgvector = { version = "^0.2.5", optional = true } structlog = "^24.1.0" -snowflake-snowpark-python = { version = "*",optional=true} +snowflake-snowpark-python = { version = "*",optional=true, python = ">=3.9,<3.12" } jinja2 = "^3.1.3"