Version 11.6 diff (#66)

* Hallacy/pickleable exceptions (#109) * Exceptions in python are hard to pickle. Make Exceptions pickleable * Remove debug * Added tests * nit * Change embeddings call to openai.Embedding.create (#110) * Change embeddings call to openai.Embedding.create * And update model names * Untweak notebook * Support encoded embeddings response (#111) * Support encoded embeddings response * Removed imports * Add a couple of comments * Typo * Remove CA bundle (#112) * Remove CA bundle * Removed some references
openai · Jan 21, 2022 · f4be8f2 · f4be8f2
1 parent 7ea5dde
commit f4be8f2
Show file tree

Hide file tree

Showing 13 changed files with 136 additions and 3,255 deletions.
diff --git a/README.md b/README.md
@@ -84,10 +84,10 @@ openai.api_key = "sk-..."  # supply your API key however you choose
 text_string = "sample text"
 
 # choose an embedding
-model_id = "davinci-similarity"
+model_id = "text-similarity-davinci-001"
 
 # compute the embedding of the text
-embedding = openai.Engine(id=model_id).embeddings(input=text_string)['data'][0]['embedding']
+embedding = openai.Embedding.create(input=text_string, engine=model_id)['data'][0]['embedding']
 ```
 
 An example of how to call the embeddings method is shown in the [get embeddings notebook](https://github.com/openai/openai-python/blob/main/examples/embeddings/Get_embeddings.ipynb).

diff --git a/examples/embeddings/Get_embeddings.ipynb b/examples/embeddings/Get_embeddings.ipynb
@@ -28,7 +28,7 @@
    "source": [
     "import openai\n",
     "\n",
-    "embedding = openai.Engine(id=\"davinci-similarity\").embeddings(input=\"Sample document text goes here\")['data'][0]['embedding']\n",
+    "embedding = openai.Embedding.create(input=\"Sample document text goes here\", engine=\"text-similarity-davinci-001\")['data'][0]['embedding']\n",
     "len(embedding)"
    ]
   },
@@ -50,14 +50,15 @@
     "from tenacity import retry, wait_random_exponential, stop_after_attempt\n",
     "\n",
     "@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))\n",
-    "def get_embedding(text, engine=\"davinci-similarity\"):\n",
+    "def get_embedding(text, engine=\"text-similarity-davinci-001\"):\n",
+    "\n",
     "\n",
     "    # replace newlines, which can negatively affect performance.\n",
     "    text = text.replace(\"\\n\", \" \")\n",
     "\n",
-    "    return openai.Engine(id=engine).embeddings(input = [text])['data'][0]['embedding']\n",
+    "    return openai.Embedding.create(input=[text], engine=engine)['data'][0]['embedding']\n",
     "\n",
-    "embedding = get_embedding(\"Sample query text goes here\", engine=\"ada-search-query\")\n",
+    "embedding = get_embedding(\"Sample query text goes here\", engine=\"text-search-ada-query-001\")\n",
     "print(len(embedding))"
    ]
   },
@@ -75,7 +76,7 @@
     }
    ],
    "source": [
-    "embedding = get_embedding(\"Sample document text goes here\", engine=\"ada-search-document\")\n",
+    "embedding = get_embedding(\"Sample document text goes here\", engine=\"text-search-ada-doc-001\")\n",
     "print(len(embedding))"
    ]
   }

diff --git a/openai/__init__.py b/openai/__init__.py
@@ -9,6 +9,7 @@
     Answer,
     Classification,
     Completion,
+    Embedding,
     Engine,
     ErrorObject,
     File,
@@ -31,7 +32,7 @@
 proxy = None
 app_info = None
 enable_telemetry = False  # Ignored; the telemetry feature was removed.
-ca_bundle_path = os.path.join(os.path.dirname(__file__), "data/ca-certificates.crt")
+ca_bundle_path = None  # No longer used, feature was removed
 debug = False
 log = None  # Set to either 'debug' or 'info', controls console logging
 
@@ -40,6 +41,7 @@
     "Answer",
     "Classification",
     "Completion",
+    "Embedding",
     "Engine",
     "ErrorObject",
     "File",

diff --git a/openai/api_requestor.py b/openai/api_requestor.py
@@ -49,7 +49,6 @@ def _make_session() -> requests.Session:
     proxies = _requests_proxies_arg(openai.proxy)
     if proxies:
         s.proxies = proxies
-    s.verify = openai.ca_bundle_path
     s.mount(
         "https://",
         requests.adapters.HTTPAdapter(max_retries=MAX_CONNECTION_RETRIES),

diff --git a/openai/api_resources/__init__.py b/openai/api_resources/__init__.py
@@ -1,6 +1,7 @@
 from openai.api_resources.answer import Answer  # noqa: F401
 from openai.api_resources.classification import Classification  # noqa: F401
 from openai.api_resources.completion import Completion  # noqa: F401
+from openai.api_resources.embedding import Embedding  # noqa: F401
 from openai.api_resources.engine import Engine  # noqa: F401
 from openai.api_resources.error_object import ErrorObject  # noqa: F401
 from openai.api_resources.file import File  # noqa: F401

diff --git a/openai/api_resources/embedding.py b/openai/api_resources/embedding.py
@@ -0,0 +1,58 @@
+import base64
+import time
+
+import numpy as np
+
+from openai import util
+from openai.api_resources.abstract import DeletableAPIResource, ListableAPIResource
+from openai.api_resources.abstract.engine_api_resource import EngineAPIResource
+from openai.error import InvalidRequestError, TryAgain
+
+
+class Embedding(EngineAPIResource, ListableAPIResource, DeletableAPIResource):
+    engine_required = True
+    OBJECT_NAME = "embedding"
+
+    @classmethod
+    def create(cls, *args, **kwargs):
+        """
+        Creates a new embedding for the provided input and parameters.
+
+        See https://beta.openai.com/docs/api-reference/embeddings for a list
+        of valid parameters.
+        """
+        start = time.time()
+        timeout = kwargs.pop("timeout", None)
+        if kwargs.get("model", None) is None and kwargs.get("engine", None) is None:
+            raise InvalidRequestError(
+                "Must provide an 'engine' or 'model' parameter to create an Embedding.",
+                param="engine",
+            )
+
+        user_provided_encoding_format = kwargs.get("encoding_format", None)
+
+        # If encoding format was not explicitly specified, we opaquely use base64 for performance
+        if not user_provided_encoding_format:
+            kwargs["encoding_format"] = "base64"
+
+        while True:
+            try:
+                response = super().create(*args, **kwargs)
+
+                # If a user specifies base64, we'll just return the encoded string.
+                # This is only for the default case.
+                if not user_provided_encoding_format:
+                    for data in response.data:
+
+                        # If an engine isn't using this optimization, don't do anything
+                        if type(data["embedding"]) == str:
+                            data["embedding"] = np.frombuffer(
+                                base64.b64decode(data["embedding"]), dtype="float32"
+                            ).tolist()
+
+                return response
+            except TryAgain as e:
+                if timeout is not None and time.time() > start + timeout:
+                    raise
+
+                util.log_info("Waiting for model to warm up", error=e)
diff --git a/openai/api_resources/engine.py b/openai/api_resources/engine.py
@@ -1,4 +1,5 @@
 import time
+import warnings
 
 from openai import util
 from openai.api_resources.abstract import ListableAPIResource, UpdateableAPIResource
@@ -29,4 +30,7 @@ def search(self, **params):
         return self.request("post", self.instance_url() + "/search", params)
 
     def embeddings(self, **params):
+        warnings.warn(
+            "Engine.embeddings is deprecated, use Embedding.create", DeprecationWarning
+        )
         return self.request("post", self.instance_url() + "/embeddings", params)