Skip to content

Commit

Permalink
Version 11.6 diff (#66)
Browse files Browse the repository at this point in the history
* Hallacy/pickleable exceptions (#109)

* Exceptions in python are hard to pickle.  Make Exceptions pickleable

* Remove debug

* Added tests

* nit

* Change embeddings call to openai.Embedding.create (#110)

* Change embeddings call to openai.Embedding.create

* And update model names

* Untweak notebook

* Support encoded embeddings response (#111)

* Support encoded embeddings response

* Removed imports

* Add a couple of comments

* Typo

* Remove CA bundle (#112)

* Remove CA bundle

* Removed some references
  • Loading branch information
hallacy committed Jan 21, 2022
1 parent 7ea5dde commit f4be8f2
Show file tree
Hide file tree
Showing 13 changed files with 136 additions and 3,255 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ openai.api_key = "sk-..." # supply your API key however you choose
text_string = "sample text"

# choose an embedding
model_id = "davinci-similarity"
model_id = "text-similarity-davinci-001"

# compute the embedding of the text
embedding = openai.Engine(id=model_id).embeddings(input=text_string)['data'][0]['embedding']
embedding = openai.Embedding.create(input=text_string, engine=model_id)['data'][0]['embedding']
```

An example of how to call the embeddings method is shown in the [get embeddings notebook](https://github.com/openai/openai-python/blob/main/examples/embeddings/Get_embeddings.ipynb).
Expand Down
11 changes: 6 additions & 5 deletions examples/embeddings/Get_embeddings.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"source": [
"import openai\n",
"\n",
"embedding = openai.Engine(id=\"davinci-similarity\").embeddings(input=\"Sample document text goes here\")['data'][0]['embedding']\n",
"embedding = openai.Embedding.create(input=\"Sample document text goes here\", engine=\"text-similarity-davinci-001\")['data'][0]['embedding']\n",
"len(embedding)"
]
},
Expand All @@ -50,14 +50,15 @@
"from tenacity import retry, wait_random_exponential, stop_after_attempt\n",
"\n",
"@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))\n",
"def get_embedding(text, engine=\"davinci-similarity\"):\n",
"def get_embedding(text, engine=\"text-similarity-davinci-001\"):\n",
"\n",
"\n",
" # replace newlines, which can negatively affect performance.\n",
" text = text.replace(\"\\n\", \" \")\n",
"\n",
" return openai.Engine(id=engine).embeddings(input = [text])['data'][0]['embedding']\n",
" return openai.Embedding.create(input=[text], engine=engine)['data'][0]['embedding']\n",
"\n",
"embedding = get_embedding(\"Sample query text goes here\", engine=\"ada-search-query\")\n",
"embedding = get_embedding(\"Sample query text goes here\", engine=\"text-search-ada-query-001\")\n",
"print(len(embedding))"
]
},
Expand All @@ -75,7 +76,7 @@
}
],
"source": [
"embedding = get_embedding(\"Sample document text goes here\", engine=\"ada-search-document\")\n",
"embedding = get_embedding(\"Sample document text goes here\", engine=\"text-search-ada-doc-001\")\n",
"print(len(embedding))"
]
}
Expand Down
4 changes: 3 additions & 1 deletion openai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Answer,
Classification,
Completion,
Embedding,
Engine,
ErrorObject,
File,
Expand All @@ -31,7 +32,7 @@
proxy = None
app_info = None
enable_telemetry = False # Ignored; the telemetry feature was removed.
ca_bundle_path = os.path.join(os.path.dirname(__file__), "data/ca-certificates.crt")
ca_bundle_path = None # No longer used, feature was removed
debug = False
log = None # Set to either 'debug' or 'info', controls console logging

Expand All @@ -40,6 +41,7 @@
"Answer",
"Classification",
"Completion",
"Embedding",
"Engine",
"ErrorObject",
"File",
Expand Down
1 change: 0 additions & 1 deletion openai/api_requestor.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def _make_session() -> requests.Session:
proxies = _requests_proxies_arg(openai.proxy)
if proxies:
s.proxies = proxies
s.verify = openai.ca_bundle_path
s.mount(
"https://",
requests.adapters.HTTPAdapter(max_retries=MAX_CONNECTION_RETRIES),
Expand Down
1 change: 1 addition & 0 deletions openai/api_resources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from openai.api_resources.answer import Answer # noqa: F401
from openai.api_resources.classification import Classification # noqa: F401
from openai.api_resources.completion import Completion # noqa: F401
from openai.api_resources.embedding import Embedding # noqa: F401
from openai.api_resources.engine import Engine # noqa: F401
from openai.api_resources.error_object import ErrorObject # noqa: F401
from openai.api_resources.file import File # noqa: F401
Expand Down
58 changes: 58 additions & 0 deletions openai/api_resources/embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import base64
import time

import numpy as np

from openai import util
from openai.api_resources.abstract import DeletableAPIResource, ListableAPIResource
from openai.api_resources.abstract.engine_api_resource import EngineAPIResource
from openai.error import InvalidRequestError, TryAgain


class Embedding(EngineAPIResource, ListableAPIResource, DeletableAPIResource):
engine_required = True
OBJECT_NAME = "embedding"

@classmethod
def create(cls, *args, **kwargs):
"""
Creates a new embedding for the provided input and parameters.
See https://beta.openai.com/docs/api-reference/embeddings for a list
of valid parameters.
"""
start = time.time()
timeout = kwargs.pop("timeout", None)
if kwargs.get("model", None) is None and kwargs.get("engine", None) is None:
raise InvalidRequestError(
"Must provide an 'engine' or 'model' parameter to create an Embedding.",
param="engine",
)

user_provided_encoding_format = kwargs.get("encoding_format", None)

# If encoding format was not explicitly specified, we opaquely use base64 for performance
if not user_provided_encoding_format:
kwargs["encoding_format"] = "base64"

while True:
try:
response = super().create(*args, **kwargs)

# If a user specifies base64, we'll just return the encoded string.
# This is only for the default case.
if not user_provided_encoding_format:
for data in response.data:

# If an engine isn't using this optimization, don't do anything
if type(data["embedding"]) == str:
data["embedding"] = np.frombuffer(
base64.b64decode(data["embedding"]), dtype="float32"
).tolist()

return response
except TryAgain as e:
if timeout is not None and time.time() > start + timeout:
raise

util.log_info("Waiting for model to warm up", error=e)
4 changes: 4 additions & 0 deletions openai/api_resources/engine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time
import warnings

from openai import util
from openai.api_resources.abstract import ListableAPIResource, UpdateableAPIResource
Expand Down Expand Up @@ -29,4 +30,7 @@ def search(self, **params):
return self.request("post", self.instance_url() + "/search", params)

def embeddings(self, **params):
warnings.warn(
"Engine.embeddings is deprecated, use Embedding.create", DeprecationWarning
)
return self.request("post", self.instance_url() + "/embeddings", params)
Loading

0 comments on commit f4be8f2

Please sign in to comment.