From c3236cbe1e9d09397cfbe5afea1eb4d01d9e40e8 Mon Sep 17 00:00:00 2001 From: Brian Sam-Bodden Date: Thu, 29 Feb 2024 12:33:35 -0700 Subject: [PATCH 1/5] Fix vectorizer tests conditional run logic --- tests/integration/test_vectorizers.py | 46 ++++++++++++++++----------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index 9a3c1288..fbf62c8d 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -9,7 +9,6 @@ VertexAITextVectorizer, ) - @pytest.fixture def skip_vectorizer() -> bool: # os.getenv returns a string @@ -17,10 +16,6 @@ def skip_vectorizer() -> bool: print(v, flush=True) return v - -skip_vectorizer_test = lambda: pytest.config.getfixturevalue("skip_vectorizer") - - @pytest.fixture( params=[ HFTextVectorizer, @@ -29,7 +24,10 @@ def skip_vectorizer() -> bool: CohereTextVectorizer, ] ) -def vectorizer(request): +def vectorizer(request, skip_vectorizer): + if skip_vectorizer: + pytest.skip("Skipping vectorizer instantiation...") + if request.param == HFTextVectorizer: return request.param() elif request.param == OpenAITextVectorizer: @@ -40,8 +38,10 @@ def vectorizer(request): return request.param() -@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests") -def test_vectorizer_embed(vectorizer): +def test_vectorizer_embed(vectorizer, skip_vectorizer): + if skip_vectorizer: + pytest.skip("Skipping vectorizer tests") + text = "This is a test sentence." if isinstance(vectorizer, CohereTextVectorizer): embedding = vectorizer.embed(text, input_type="search_document") @@ -52,8 +52,10 @@ def test_vectorizer_embed(vectorizer): assert len(embedding) == vectorizer.dims -@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests") -def test_vectorizer_embed_many(vectorizer): +def test_vectorizer_embed_many(vectorizer, skip_vectorizer): + if skip_vectorizer: + pytest.skip("Skipping vectorizer tests") + texts = ["This is the first test sentence.", "This is the second test sentence."] if isinstance(vectorizer, CohereTextVectorizer): embeddings = vectorizer.embed_many(texts, input_type="search_document") @@ -67,8 +69,10 @@ def test_vectorizer_embed_many(vectorizer): ) -@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests") -def test_vectorizer_bad_input(vectorizer): +def test_vectorizer_bad_input(vectorizer, skip_vectorizer): + if skip_vectorizer: + pytest.skip("Skipping vectorizer tests") + with pytest.raises(TypeError): vectorizer.embed(1) @@ -86,9 +90,11 @@ def avectorizer(request): return request.param() -@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests") @pytest.mark.asyncio -async def test_vectorizer_aembed(avectorizer): +async def test_vectorizer_aembed(avectorizer, skip_vectorizer): + if skip_vectorizer: + pytest.skip("Skipping vectorizer tests") + text = "This is a test sentence." embedding = await avectorizer.aembed(text) @@ -96,9 +102,11 @@ async def test_vectorizer_aembed(avectorizer): assert len(embedding) == avectorizer.dims -@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests") @pytest.mark.asyncio -async def test_vectorizer_aembed_many(avectorizer): +async def test_vectorizer_aembed_many(avectorizer, skip_vectorizer): + if skip_vectorizer: + pytest.skip("Skipping vectorizer tests") + texts = ["This is the first test sentence.", "This is the second test sentence."] embeddings = await avectorizer.aembed_many(texts) @@ -109,9 +117,11 @@ async def test_vectorizer_aembed_many(avectorizer): ) -@pytest.mark.skipif(skip_vectorizer_test, reason="Skipping vectorizer tests") @pytest.mark.asyncio -async def test_avectorizer_bad_input(avectorizer): +async def test_avectorizer_bad_input(avectorizer, skip_vectorizer): + if skip_vectorizer: + pytest.skip("Skipping vectorizer tests") + with pytest.raises(TypeError): avectorizer.embed(1) From aeed0b5f439b7a8d3823a12bac50924ff6395632 Mon Sep 17 00:00:00 2001 From: Brian Sam-Bodden Date: Thu, 29 Feb 2024 12:37:00 -0700 Subject: [PATCH 2/5] Update to OpenAI client v1.13.3 and migrate code accordingly (#125) --- docs/user_guide/llmcache_03.ipynb | 10 ++++--- redisvl/utils/vectorize/text/openai.py | 40 ++++++++++++++------------ requirements-all.txt | 2 +- tests/integration/test_vectorizers.py | 5 +++- 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/docs/user_guide/llmcache_03.ipynb b/docs/user_guide/llmcache_03.ipynb index 0adf1c99..4646ddd4 100644 --- a/docs/user_guide/llmcache_03.ipynb +++ b/docs/user_guide/llmcache_03.ipynb @@ -25,7 +25,7 @@ "outputs": [], "source": [ "import os\n", - "import openai\n", + "from openai import OpenAI\n", "import getpass\n", "import time\n", "\n", @@ -35,11 +35,13 @@ "\n", "api_key = os.getenv(\"OPENAI_API_KEY\") or getpass.getpass(\"Enter your OpenAI API key: \")\n", "\n", - "openai.api_key = api_key\n", + "client = OpenAI(\n", + " api_key=os.environ['OPENAI_API_KEY'], # this is also the default, it can be omitted\n", + ")\n", "\n", "def ask_openai(question: str) -> str:\n", - " response = openai.Completion.create(\n", - " engine=\"gpt-3.5-turbo-instruct\",\n", + " response = client.completions.create(\n", + " model=\"gpt-3.5-turbo-instruct\",\n", " prompt=question,\n", " max_tokens=200\n", " )\n", diff --git a/redisvl/utils/vectorize/text/openai.py b/redisvl/utils/vectorize/text/openai.py index 1aa32ebe..1e0f429f 100644 --- a/redisvl/utils/vectorize/text/openai.py +++ b/redisvl/utils/vectorize/text/openai.py @@ -1,5 +1,5 @@ import os -from typing import Callable, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional from tenacity import retry, stop_after_attempt, wait_random_exponential from tenacity.retry import retry_if_not_exception_type @@ -19,7 +19,7 @@ class OpenAITextVectorizer(BaseVectorizer): in the `api_config` dictionary or through the `OPENAI_API_KEY` environment variable. Users must obtain an API key from OpenAI's website (https://api.openai.com/). Additionally, the `openai` python client must be - installed with `pip install openai==0.28.1`. + installed with `pip install openai==1.13.3`. The vectorizer supports both synchronous and asynchronous operations, allowing for batch processing of texts and flexibility in handling @@ -42,6 +42,8 @@ class OpenAITextVectorizer(BaseVectorizer): """ + aclient: Any # Since the OpenAI module is loaded dynamically + def __init__( self, model: str = "text-embedding-ada-002", api_config: Optional[Dict] = None ): @@ -59,7 +61,7 @@ def __init__( """ # Dynamic import of the openai module try: - import openai + from openai import AsyncOpenAI, OpenAI except ImportError: raise ImportError( "OpenAI vectorizer requires the openai library. \ @@ -77,17 +79,19 @@ def __init__( environment variable." ) - openai.api_key = api_key - client = openai.Embedding + client = OpenAI(api_key=api_key) dims = self._set_model_dims(client, model) super().__init__(model=model, dims=dims, client=client) + self.aclient = AsyncOpenAI(api_key=api_key) @staticmethod def _set_model_dims(client, model) -> int: try: - embedding = client.create(input=["dimension test"], engine=model)["data"][ - 0 - ]["embedding"] + embedding = ( + client.embeddings.create(input=["dimension test"], model=model) + .data[0] + .embedding + ) except (KeyError, IndexError) as ke: raise ValueError(f"Unexpected response from the OpenAI API: {str(ke)}") except Exception as e: # pylint: disable=broad-except @@ -132,10 +136,9 @@ def embed_many( embeddings: List = [] for batch in self.batchify(texts, batch_size, preprocess): - response = self.client.create(input=batch, engine=self.model) + response = self.client.embeddings.create(input=batch, model=self.model) embeddings += [ - self._process_embedding(r["embedding"], as_buffer) - for r in response["data"] + self._process_embedding(r.embedding, as_buffer) for r in response.data ] return embeddings @@ -171,8 +174,8 @@ def embed( if preprocess: text = preprocess(text) - result = self.client.create(input=[text], engine=self.model) - return self._process_embedding(result["data"][0]["embedding"], as_buffer) + result = self.client.embeddings.create(input=[text], model=self.model) + return self._process_embedding(result.data[0].embedding, as_buffer) @retry( wait=wait_random_exponential(min=1, max=60), @@ -211,10 +214,11 @@ async def aembed_many( embeddings: List = [] for batch in self.batchify(texts, batch_size, preprocess): - response = await self.client.acreate(input=batch, engine=self.model) + response = await self.aclient.embeddings.create( + input=batch, model=self.model + ) embeddings += [ - self._process_embedding(r["embedding"], as_buffer) - for r in response["data"] + self._process_embedding(r.embedding, as_buffer) for r in response.data ] return embeddings @@ -250,5 +254,5 @@ async def aembed( if preprocess: text = preprocess(text) - result = await self.client.acreate(input=[text], engine=self.model) - return self._process_embedding(result["data"][0]["embedding"], as_buffer) + result = await self.aclient.embeddings.create(input=[text], model=self.model) + return self._process_embedding(result.data[0].embedding, as_buffer) diff --git a/requirements-all.txt b/requirements-all.txt index a707d77a..30ec49d4 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -1,4 +1,4 @@ -openai<=0.28.1 +openai>=1.13.3 sentence-transformers>=2.2.2 google-cloud-aiplatform>=1.26 cohere>=4.44 \ No newline at end of file diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index fbf62c8d..c15e25f0 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -84,7 +84,10 @@ def test_vectorizer_bad_input(vectorizer, skip_vectorizer): @pytest.fixture(params=[OpenAITextVectorizer]) -def avectorizer(request): +def avectorizer(request, skip_vectorizer): + if skip_vectorizer: + pytest.skip("Skipping vectorizer instantiation...") + # Here we use actual models for integration test if request.param == OpenAITextVectorizer: return request.param() From d5c5c28296ca44df4442e59529ea845994e5d783 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Fri, 1 Mar 2024 09:00:20 -0500 Subject: [PATCH 3/5] use the api_key --- docs/user_guide/llmcache_03.ipynb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/user_guide/llmcache_03.ipynb b/docs/user_guide/llmcache_03.ipynb index 4646ddd4..9fb2a907 100644 --- a/docs/user_guide/llmcache_03.ipynb +++ b/docs/user_guide/llmcache_03.ipynb @@ -35,9 +35,7 @@ "\n", "api_key = os.getenv(\"OPENAI_API_KEY\") or getpass.getpass(\"Enter your OpenAI API key: \")\n", "\n", - "client = OpenAI(\n", - " api_key=os.environ['OPENAI_API_KEY'], # this is also the default, it can be omitted\n", - ")\n", + "client = OpenAI(api_key=api_key)\n", "\n", "def ask_openai(question: str) -> str:\n", " response = client.completions.create(\n", From 2f3684ace03b8721be31c3c92cc5b4b69228dafe Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Fri, 1 Mar 2024 09:02:27 -0500 Subject: [PATCH 4/5] pin to 1.13 and up --- redisvl/utils/vectorize/text/openai.py | 2 +- requirements-all.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/redisvl/utils/vectorize/text/openai.py b/redisvl/utils/vectorize/text/openai.py index 1e0f429f..22afb2f5 100644 --- a/redisvl/utils/vectorize/text/openai.py +++ b/redisvl/utils/vectorize/text/openai.py @@ -19,7 +19,7 @@ class OpenAITextVectorizer(BaseVectorizer): in the `api_config` dictionary or through the `OPENAI_API_KEY` environment variable. Users must obtain an API key from OpenAI's website (https://api.openai.com/). Additionally, the `openai` python client must be - installed with `pip install openai==1.13.3`. + installed with `pip install openai>=1.13.0`. The vectorizer supports both synchronous and asynchronous operations, allowing for batch processing of texts and flexibility in handling diff --git a/requirements-all.txt b/requirements-all.txt index 30ec49d4..5888a106 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -1,4 +1,4 @@ -openai>=1.13.3 +openai>=1.13.0 sentence-transformers>=2.2.2 google-cloud-aiplatform>=1.26 cohere>=4.44 \ No newline at end of file From fcaa229bfe49411383cf21755aaa0425e6677e3a Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Fri, 1 Mar 2024 09:05:50 -0500 Subject: [PATCH 5/5] Formatting --- tests/integration/test_vectorizers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index c15e25f0..c8766171 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -9,6 +9,7 @@ VertexAITextVectorizer, ) + @pytest.fixture def skip_vectorizer() -> bool: # os.getenv returns a string @@ -16,6 +17,7 @@ def skip_vectorizer() -> bool: print(v, flush=True) return v + @pytest.fixture( params=[ HFTextVectorizer,