bug: fix multiple encoding formats

mixedbread-ai · Apr 5, 2024 · d7f4b3e · d7f4b3e
1 parent 99523c9
commit d7f4b3e
Show file tree

Hide file tree

Showing 24 changed files with 165 additions and 123 deletions.
diff --git a/README.md b/README.md
@@ -68,6 +68,23 @@ embeddings = mxbai.embeddings(
 print(embeddings)
 ```
 
+By specifying the encoding format, you can leverage f.e. binary embeddings.
+
+```python
+from mixedbread_ai.client import MixedbreadAI
+from mixedbread_ai.types import EncodingFormat
+
+mxbai = MixedbreadAI(api_key="{YOUR_API_KEY}")
+
+embeddings = mxbai.embeddings(
+    model="mixedbread-ai/mxbai-embed-large-v1",
+    input=["I like to eat apples.", "I like to eat bananas."],
+    encoding_format=[EncodingFormat.FLOAT, EncodingFormat.UBINARY]
+)
+
+print(embeddings.data[0].embedding.float_, embeddings.data[0].embedding.ubinary)
+```
+
 ### Reranking (Asynchronous)
 Here's an asynchronous example of using the mixedbread ai SDK to rerank documents:
 ```python

diff --git a/mixedbread_ai/__init__.py b/mixedbread_ai/__init__.py
@@ -15,12 +15,14 @@
     InvalidMatryoshkaDimensionsError,
     InvalidMatryoshkaModelError,
     ModelNotFoundError,
+    MultiModalInput,
     MultipleEncodingsEmbedding,
     MultipleEncodingsEmbeddingItem,
     MxbaiApiError,
     MxbaiApiErrorDetails,
     NotFoundErrorBody,
     ObjectType,
+    Query,
     RankedDocument,
     RerankingResponse,
     TextDocument,
@@ -61,13 +63,15 @@
     "InvalidMatryoshkaModelError",
     "MixedbreadAIEnvironment",
     "ModelNotFoundError",
+    "MultiModalInput",
     "MultipleEncodingsEmbedding",
     "MultipleEncodingsEmbeddingItem",
     "MxbaiApiError",
     "MxbaiApiErrorDetails",
     "NotFoundError",
     "NotFoundErrorBody",
     "ObjectType",
+    "Query",
     "RankedDocument",
     "RerankingResponse",
     "TextDocument",

diff --git a/mixedbread_ai/client.py b/mixedbread_ai/client.py
@@ -25,9 +25,10 @@
 from .types.forbidden_error_body import ForbiddenErrorBody
 from .types.input import Input
 from .types.internal_error import InternalError
+from .types.multi_modal_input import MultiModalInput
 from .types.not_found_error_body import NotFoundErrorBody
+from .types.query import Query
 from .types.reranking_response import RerankingResponse
-from .types.text_document import TextDocument
 from .types.too_many_requests_error_body import TooManyRequestsErrorBody
 from .types.truncation_strategy import TruncationStrategy
 from .types.unauthorized_error_body import UnauthorizedErrorBody
@@ -82,61 +83,61 @@ def __init__(
     def embeddings(
         self,
         *,
-        dimensions: typing.Optional[int] = OMIT,
-        encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat] = OMIT,
-        input: Input,
-        instruction: typing.Optional[str] = OMIT,
         model: str,
+        input: MultiModalInput,
         normalized: typing.Optional[bool] = OMIT,
-        prompt: typing.Optional[str] = OMIT,
-        texts: typing.Optional[typing.Sequence[str]] = OMIT,
+        encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat] = OMIT,
         truncation_strategy: typing.Optional[TruncationStrategy] = OMIT,
+        dimensions: typing.Optional[int] = OMIT,
+        instruction: typing.Optional[str] = OMIT,
+        texts: typing.Optional[typing.Sequence[str]] = OMIT,
+        prompt: typing.Optional[str] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> EmbeddingsResponse:
         """
         Create embeddings for text or images using the specified model, encoding format, and normalization.
 
         Parameters:
-            - dimensions: typing.Optional[int].
+            - model: str. The model to use for creating embeddings
 
-            - encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat].
+            - input: MultiModalInput.
 
-            - input: Input.
+            - normalized: typing.Optional[bool]. Whether to normalize the embeddings
 
-            - instruction: typing.Optional[str].
+            - encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat].
 
-            - model: str. The model to use for creating embeddings
+            - truncation_strategy: typing.Optional[TruncationStrategy]. The truncation strategy to use for the input
 
-            - normalized: typing.Optional[bool]. Whether to normalize the embeddings
+            - dimensions: typing.Optional[int].
 
-            - prompt: typing.Optional[str].
+            - instruction: typing.Optional[str].
 
             - texts: typing.Optional[typing.Sequence[str]].
 
-            - truncation_strategy: typing.Optional[TruncationStrategy]. The truncation strategy to use for the input
+            - prompt: typing.Optional[str].
 
             - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
         ---
         from mixedbread-ai.client import MixedbreadAI
 
         client = MixedbreadAI(api_key="YOUR_API_KEY", )
-        client.embeddings(input="input", model="model", )
+        client.embeddings(model="model", input="input", )
         """
-        _request: typing.Dict[str, typing.Any] = {"input": input, "model": model}
-        if dimensions is not OMIT:
-            _request["dimensions"] = dimensions
+        _request: typing.Dict[str, typing.Any] = {"model": model, "input": input}
+        if normalized is not OMIT:
+            _request["normalized"] = normalized
         if encoding_format is not OMIT:
             _request["encoding_format"] = encoding_format
+        if truncation_strategy is not OMIT:
+            _request["truncation_strategy"] = truncation_strategy
+        if dimensions is not OMIT:
+            _request["dimensions"] = dimensions
         if instruction is not OMIT:
             _request["instruction"] = instruction
-        if normalized is not OMIT:
-            _request["normalized"] = normalized
-        if prompt is not OMIT:
-            _request["prompt"] = prompt
         if texts is not OMIT:
             _request["texts"] = texts
-        if truncation_strategy is not OMIT:
-            _request["truncation_strategy"] = truncation_strategy
+        if prompt is not OMIT:
+            _request["prompt"] = prompt
         _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "v1/embeddings"),
@@ -190,38 +191,38 @@ def embeddings(
     def reranking(
         self,
         *,
-        input: typing.Sequence[TextDocument],
         model: str,
-        query: TextDocument,
-        return_input: typing.Optional[bool] = OMIT,
+        input: Input,
+        query: Query,
         top_k: typing.Optional[int] = OMIT,
+        return_input: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> RerankingResponse:
         """
         Parameters:
-            - input: typing.Sequence[TextDocument]. The input documents to rerank
-
             - model: str. The model to use for creating embeddings
 
-            - query: TextDocument. The query to rerank the documents
+            - input: Input. The input documents to rerank
 
-            - return_input: typing.Optional[bool]. Whether to return the documents
+            - query: Query. The query to rerank the documents
 
             - top_k: typing.Optional[int]. The number of documents to return
 
+            - return_input: typing.Optional[bool]. Whether to return the documents
+
             - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
         ---
         from mixedbread-ai import TextDocument
         from mixedbread-ai.client import MixedbreadAI
 
         client = MixedbreadAI(api_key="YOUR_API_KEY", )
-        client.reranking(input=[TextDocument(text="text", )], model="model", query=TextDocument(text="text", ), return_input=False, top_k=10, )
+        client.reranking(model="model", input=["input"], query=TextDocument(text="text", ), top_k=10, return_input=False, )
         """
-        _request: typing.Dict[str, typing.Any] = {"input": input, "model": model, "query": query}
-        if return_input is not OMIT:
-            _request["return_input"] = return_input
+        _request: typing.Dict[str, typing.Any] = {"model": model, "input": input, "query": query}
         if top_k is not OMIT:
             _request["top_k"] = top_k
+        if return_input is not OMIT:
+            _request["return_input"] = return_input
         _response = self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "v1/reranking"),
@@ -313,61 +314,61 @@ def __init__(
     async def embeddings(
         self,
         *,
-        dimensions: typing.Optional[int] = OMIT,
-        encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat] = OMIT,
-        input: Input,
-        instruction: typing.Optional[str] = OMIT,
         model: str,
+        input: MultiModalInput,
         normalized: typing.Optional[bool] = OMIT,
-        prompt: typing.Optional[str] = OMIT,
-        texts: typing.Optional[typing.Sequence[str]] = OMIT,
+        encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat] = OMIT,
         truncation_strategy: typing.Optional[TruncationStrategy] = OMIT,
+        dimensions: typing.Optional[int] = OMIT,
+        instruction: typing.Optional[str] = OMIT,
+        texts: typing.Optional[typing.Sequence[str]] = OMIT,
+        prompt: typing.Optional[str] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> EmbeddingsResponse:
         """
         Create embeddings for text or images using the specified model, encoding format, and normalization.
 
         Parameters:
-            - dimensions: typing.Optional[int].
+            - model: str. The model to use for creating embeddings
 
-            - encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat].
+            - input: MultiModalInput.
 
-            - input: Input.
+            - normalized: typing.Optional[bool]. Whether to normalize the embeddings
 
-            - instruction: typing.Optional[str].
+            - encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat].
 
-            - model: str. The model to use for creating embeddings
+            - truncation_strategy: typing.Optional[TruncationStrategy]. The truncation strategy to use for the input
 
-            - normalized: typing.Optional[bool]. Whether to normalize the embeddings
+            - dimensions: typing.Optional[int].
 
-            - prompt: typing.Optional[str].
+            - instruction: typing.Optional[str].
 
             - texts: typing.Optional[typing.Sequence[str]].
 
-            - truncation_strategy: typing.Optional[TruncationStrategy]. The truncation strategy to use for the input
+            - prompt: typing.Optional[str].
 
             - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
         ---
         from mixedbread-ai.client import AsyncMixedbreadAI
 
         client = AsyncMixedbreadAI(api_key="YOUR_API_KEY", )
-        await client.embeddings(input="input", model="model", )
+        await client.embeddings(model="model", input="input", )
         """
-        _request: typing.Dict[str, typing.Any] = {"input": input, "model": model}
-        if dimensions is not OMIT:
-            _request["dimensions"] = dimensions
+        _request: typing.Dict[str, typing.Any] = {"model": model, "input": input}
+        if normalized is not OMIT:
+            _request["normalized"] = normalized
         if encoding_format is not OMIT:
             _request["encoding_format"] = encoding_format
+        if truncation_strategy is not OMIT:
+            _request["truncation_strategy"] = truncation_strategy
+        if dimensions is not OMIT:
+            _request["dimensions"] = dimensions
         if instruction is not OMIT:
             _request["instruction"] = instruction
-        if normalized is not OMIT:
-            _request["normalized"] = normalized
-        if prompt is not OMIT:
-            _request["prompt"] = prompt
         if texts is not OMIT:
             _request["texts"] = texts
-        if truncation_strategy is not OMIT:
-            _request["truncation_strategy"] = truncation_strategy
+        if prompt is not OMIT:
+            _request["prompt"] = prompt
         _response = await self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "v1/embeddings"),
@@ -421,38 +422,38 @@ async def embeddings(
     async def reranking(
         self,
         *,
-        input: typing.Sequence[TextDocument],
         model: str,
-        query: TextDocument,
-        return_input: typing.Optional[bool] = OMIT,
+        input: Input,
+        query: Query,
         top_k: typing.Optional[int] = OMIT,
+        return_input: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> RerankingResponse:
         """
         Parameters:
-            - input: typing.Sequence[TextDocument]. The input documents to rerank
-
             - model: str. The model to use for creating embeddings
 
-            - query: TextDocument. The query to rerank the documents
+            - input: Input. The input documents to rerank
 
-            - return_input: typing.Optional[bool]. Whether to return the documents
+            - query: Query. The query to rerank the documents
 
             - top_k: typing.Optional[int]. The number of documents to return
 
+            - return_input: typing.Optional[bool]. Whether to return the documents
+
             - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
         ---
         from mixedbread-ai import TextDocument
         from mixedbread-ai.client import AsyncMixedbreadAI
 
         client = AsyncMixedbreadAI(api_key="YOUR_API_KEY", )
-        await client.reranking(input=[TextDocument(text="text", )], model="model", query=TextDocument(text="text", ), return_input=False, top_k=10, )
+        await client.reranking(model="model", input=["input"], query=TextDocument(text="text", ), top_k=10, return_input=False, )
         """
-        _request: typing.Dict[str, typing.Any] = {"input": input, "model": model, "query": query}
-        if return_input is not OMIT:
-            _request["return_input"] = return_input
+        _request: typing.Dict[str, typing.Any] = {"model": model, "input": input, "query": query}
         if top_k is not OMIT:
             _request["top_k"] = top_k
+        if return_input is not OMIT:
+            _request["return_input"] = return_input
         _response = await self._client_wrapper.httpx_client.request(
             "POST",
             urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "v1/reranking"),

diff --git a/mixedbread_ai/types/__init__.py b/mixedbread_ai/types/__init__.py
@@ -14,12 +14,14 @@
 from .invalid_matryoshka_dimensions_error import InvalidMatryoshkaDimensionsError
 from .invalid_matryoshka_model_error import InvalidMatryoshkaModelError
 from .model_not_found_error import ModelNotFoundError
+from .multi_modal_input import MultiModalInput
 from .multiple_encodings_embedding import MultipleEncodingsEmbedding
 from .multiple_encodings_embedding_item import MultipleEncodingsEmbeddingItem
 from .mxbai_api_error import MxbaiApiError
 from .mxbai_api_error_details import MxbaiApiErrorDetails
 from .not_found_error_body import NotFoundErrorBody
 from .object_type import ObjectType
+from .query import Query
 from .ranked_document import RankedDocument
 from .reranking_response import RerankingResponse
 from .text_document import TextDocument
@@ -45,12 +47,14 @@
     "InvalidMatryoshkaDimensionsError",
     "InvalidMatryoshkaModelError",
     "ModelNotFoundError",
+    "MultiModalInput",
     "MultipleEncodingsEmbedding",
     "MultipleEncodingsEmbeddingItem",
     "MxbaiApiError",
     "MxbaiApiErrorDetails",
     "NotFoundErrorBody",
     "ObjectType",
+    "Query",
     "RankedDocument",
     "RerankingResponse",
     "TextDocument",

diff --git a/mixedbread_ai/types/bad_request_error_body.py b/mixedbread_ai/types/bad_request_error_body.py
@@ -14,9 +14,9 @@
 
 class BadRequestErrorBody(pydantic.BaseModel):
     type: typing.Optional[typing.Literal["bad_request_error"]] = None
-    details: typing.Optional[MxbaiApiErrorDetails] = None
-    message: typing.Optional[str] = None
     url: typing.Optional[str] = None
+    message: typing.Optional[str] = None
+    details: typing.Optional[MxbaiApiErrorDetails] = None
 
     def json(self, **kwargs: typing.Any) -> str:
         kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}