Skip to content

Commit

Permalink
bug: fix multiple encoding formats
Browse files Browse the repository at this point in the history
  • Loading branch information
juliuslipp committed Apr 5, 2024
1 parent 99523c9 commit d7f4b3e
Show file tree
Hide file tree
Showing 24 changed files with 165 additions and 123 deletions.
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,23 @@ embeddings = mxbai.embeddings(
print(embeddings)
```

By specifying the encoding format, you can leverage f.e. binary embeddings.

```python
from mixedbread_ai.client import MixedbreadAI
from mixedbread_ai.types import EncodingFormat

mxbai = MixedbreadAI(api_key="{YOUR_API_KEY}")

embeddings = mxbai.embeddings(
model="mixedbread-ai/mxbai-embed-large-v1",
input=["I like to eat apples.", "I like to eat bananas."],
encoding_format=[EncodingFormat.FLOAT, EncodingFormat.UBINARY]
)

print(embeddings.data[0].embedding.float_, embeddings.data[0].embedding.ubinary)
```

### Reranking (Asynchronous)
Here's an asynchronous example of using the mixedbread ai SDK to rerank documents:
```python
Expand Down
4 changes: 4 additions & 0 deletions mixedbread_ai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@
InvalidMatryoshkaDimensionsError,
InvalidMatryoshkaModelError,
ModelNotFoundError,
MultiModalInput,
MultipleEncodingsEmbedding,
MultipleEncodingsEmbeddingItem,
MxbaiApiError,
MxbaiApiErrorDetails,
NotFoundErrorBody,
ObjectType,
Query,
RankedDocument,
RerankingResponse,
TextDocument,
Expand Down Expand Up @@ -61,13 +63,15 @@
"InvalidMatryoshkaModelError",
"MixedbreadAIEnvironment",
"ModelNotFoundError",
"MultiModalInput",
"MultipleEncodingsEmbedding",
"MultipleEncodingsEmbeddingItem",
"MxbaiApiError",
"MxbaiApiErrorDetails",
"NotFoundError",
"NotFoundErrorBody",
"ObjectType",
"Query",
"RankedDocument",
"RerankingResponse",
"TextDocument",
Expand Down
143 changes: 72 additions & 71 deletions mixedbread_ai/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@
from .types.forbidden_error_body import ForbiddenErrorBody
from .types.input import Input
from .types.internal_error import InternalError
from .types.multi_modal_input import MultiModalInput
from .types.not_found_error_body import NotFoundErrorBody
from .types.query import Query
from .types.reranking_response import RerankingResponse
from .types.text_document import TextDocument
from .types.too_many_requests_error_body import TooManyRequestsErrorBody
from .types.truncation_strategy import TruncationStrategy
from .types.unauthorized_error_body import UnauthorizedErrorBody
Expand Down Expand Up @@ -82,61 +83,61 @@ def __init__(
def embeddings(
self,
*,
dimensions: typing.Optional[int] = OMIT,
encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat] = OMIT,
input: Input,
instruction: typing.Optional[str] = OMIT,
model: str,
input: MultiModalInput,
normalized: typing.Optional[bool] = OMIT,
prompt: typing.Optional[str] = OMIT,
texts: typing.Optional[typing.Sequence[str]] = OMIT,
encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat] = OMIT,
truncation_strategy: typing.Optional[TruncationStrategy] = OMIT,
dimensions: typing.Optional[int] = OMIT,
instruction: typing.Optional[str] = OMIT,
texts: typing.Optional[typing.Sequence[str]] = OMIT,
prompt: typing.Optional[str] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> EmbeddingsResponse:
"""
Create embeddings for text or images using the specified model, encoding format, and normalization.
Parameters:
- dimensions: typing.Optional[int].
- model: str. The model to use for creating embeddings
- encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat].
- input: MultiModalInput.
- input: Input.
- normalized: typing.Optional[bool]. Whether to normalize the embeddings
- instruction: typing.Optional[str].
- encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat].
- model: str. The model to use for creating embeddings
- truncation_strategy: typing.Optional[TruncationStrategy]. The truncation strategy to use for the input
- normalized: typing.Optional[bool]. Whether to normalize the embeddings
- dimensions: typing.Optional[int].
- prompt: typing.Optional[str].
- instruction: typing.Optional[str].
- texts: typing.Optional[typing.Sequence[str]].
- truncation_strategy: typing.Optional[TruncationStrategy]. The truncation strategy to use for the input
- prompt: typing.Optional[str].
- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
---
from mixedbread-ai.client import MixedbreadAI
client = MixedbreadAI(api_key="YOUR_API_KEY", )
client.embeddings(input="input", model="model", )
client.embeddings(model="model", input="input", )
"""
_request: typing.Dict[str, typing.Any] = {"input": input, "model": model}
if dimensions is not OMIT:
_request["dimensions"] = dimensions
_request: typing.Dict[str, typing.Any] = {"model": model, "input": input}
if normalized is not OMIT:
_request["normalized"] = normalized
if encoding_format is not OMIT:
_request["encoding_format"] = encoding_format
if truncation_strategy is not OMIT:
_request["truncation_strategy"] = truncation_strategy
if dimensions is not OMIT:
_request["dimensions"] = dimensions
if instruction is not OMIT:
_request["instruction"] = instruction
if normalized is not OMIT:
_request["normalized"] = normalized
if prompt is not OMIT:
_request["prompt"] = prompt
if texts is not OMIT:
_request["texts"] = texts
if truncation_strategy is not OMIT:
_request["truncation_strategy"] = truncation_strategy
if prompt is not OMIT:
_request["prompt"] = prompt
_response = self._client_wrapper.httpx_client.request(
"POST",
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "v1/embeddings"),
Expand Down Expand Up @@ -190,38 +191,38 @@ def embeddings(
def reranking(
self,
*,
input: typing.Sequence[TextDocument],
model: str,
query: TextDocument,
return_input: typing.Optional[bool] = OMIT,
input: Input,
query: Query,
top_k: typing.Optional[int] = OMIT,
return_input: typing.Optional[bool] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> RerankingResponse:
"""
Parameters:
- input: typing.Sequence[TextDocument]. The input documents to rerank
- model: str. The model to use for creating embeddings
- query: TextDocument. The query to rerank the documents
- input: Input. The input documents to rerank
- return_input: typing.Optional[bool]. Whether to return the documents
- query: Query. The query to rerank the documents
- top_k: typing.Optional[int]. The number of documents to return
- return_input: typing.Optional[bool]. Whether to return the documents
- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
---
from mixedbread-ai import TextDocument
from mixedbread-ai.client import MixedbreadAI
client = MixedbreadAI(api_key="YOUR_API_KEY", )
client.reranking(input=[TextDocument(text="text", )], model="model", query=TextDocument(text="text", ), return_input=False, top_k=10, )
client.reranking(model="model", input=["input"], query=TextDocument(text="text", ), top_k=10, return_input=False, )
"""
_request: typing.Dict[str, typing.Any] = {"input": input, "model": model, "query": query}
if return_input is not OMIT:
_request["return_input"] = return_input
_request: typing.Dict[str, typing.Any] = {"model": model, "input": input, "query": query}
if top_k is not OMIT:
_request["top_k"] = top_k
if return_input is not OMIT:
_request["return_input"] = return_input
_response = self._client_wrapper.httpx_client.request(
"POST",
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "v1/reranking"),
Expand Down Expand Up @@ -313,61 +314,61 @@ def __init__(
async def embeddings(
self,
*,
dimensions: typing.Optional[int] = OMIT,
encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat] = OMIT,
input: Input,
instruction: typing.Optional[str] = OMIT,
model: str,
input: MultiModalInput,
normalized: typing.Optional[bool] = OMIT,
prompt: typing.Optional[str] = OMIT,
texts: typing.Optional[typing.Sequence[str]] = OMIT,
encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat] = OMIT,
truncation_strategy: typing.Optional[TruncationStrategy] = OMIT,
dimensions: typing.Optional[int] = OMIT,
instruction: typing.Optional[str] = OMIT,
texts: typing.Optional[typing.Sequence[str]] = OMIT,
prompt: typing.Optional[str] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> EmbeddingsResponse:
"""
Create embeddings for text or images using the specified model, encoding format, and normalization.
Parameters:
- dimensions: typing.Optional[int].
- model: str. The model to use for creating embeddings
- encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat].
- input: MultiModalInput.
- input: Input.
- normalized: typing.Optional[bool]. Whether to normalize the embeddings
- instruction: typing.Optional[str].
- encoding_format: typing.Optional[EmbeddingsRequestEncodingFormat].
- model: str. The model to use for creating embeddings
- truncation_strategy: typing.Optional[TruncationStrategy]. The truncation strategy to use for the input
- normalized: typing.Optional[bool]. Whether to normalize the embeddings
- dimensions: typing.Optional[int].
- prompt: typing.Optional[str].
- instruction: typing.Optional[str].
- texts: typing.Optional[typing.Sequence[str]].
- truncation_strategy: typing.Optional[TruncationStrategy]. The truncation strategy to use for the input
- prompt: typing.Optional[str].
- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
---
from mixedbread-ai.client import AsyncMixedbreadAI
client = AsyncMixedbreadAI(api_key="YOUR_API_KEY", )
await client.embeddings(input="input", model="model", )
await client.embeddings(model="model", input="input", )
"""
_request: typing.Dict[str, typing.Any] = {"input": input, "model": model}
if dimensions is not OMIT:
_request["dimensions"] = dimensions
_request: typing.Dict[str, typing.Any] = {"model": model, "input": input}
if normalized is not OMIT:
_request["normalized"] = normalized
if encoding_format is not OMIT:
_request["encoding_format"] = encoding_format
if truncation_strategy is not OMIT:
_request["truncation_strategy"] = truncation_strategy
if dimensions is not OMIT:
_request["dimensions"] = dimensions
if instruction is not OMIT:
_request["instruction"] = instruction
if normalized is not OMIT:
_request["normalized"] = normalized
if prompt is not OMIT:
_request["prompt"] = prompt
if texts is not OMIT:
_request["texts"] = texts
if truncation_strategy is not OMIT:
_request["truncation_strategy"] = truncation_strategy
if prompt is not OMIT:
_request["prompt"] = prompt
_response = await self._client_wrapper.httpx_client.request(
"POST",
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "v1/embeddings"),
Expand Down Expand Up @@ -421,38 +422,38 @@ async def embeddings(
async def reranking(
self,
*,
input: typing.Sequence[TextDocument],
model: str,
query: TextDocument,
return_input: typing.Optional[bool] = OMIT,
input: Input,
query: Query,
top_k: typing.Optional[int] = OMIT,
return_input: typing.Optional[bool] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> RerankingResponse:
"""
Parameters:
- input: typing.Sequence[TextDocument]. The input documents to rerank
- model: str. The model to use for creating embeddings
- query: TextDocument. The query to rerank the documents
- input: Input. The input documents to rerank
- return_input: typing.Optional[bool]. Whether to return the documents
- query: Query. The query to rerank the documents
- top_k: typing.Optional[int]. The number of documents to return
- return_input: typing.Optional[bool]. Whether to return the documents
- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
---
from mixedbread-ai import TextDocument
from mixedbread-ai.client import AsyncMixedbreadAI
client = AsyncMixedbreadAI(api_key="YOUR_API_KEY", )
await client.reranking(input=[TextDocument(text="text", )], model="model", query=TextDocument(text="text", ), return_input=False, top_k=10, )
await client.reranking(model="model", input=["input"], query=TextDocument(text="text", ), top_k=10, return_input=False, )
"""
_request: typing.Dict[str, typing.Any] = {"input": input, "model": model, "query": query}
if return_input is not OMIT:
_request["return_input"] = return_input
_request: typing.Dict[str, typing.Any] = {"model": model, "input": input, "query": query}
if top_k is not OMIT:
_request["top_k"] = top_k
if return_input is not OMIT:
_request["return_input"] = return_input
_response = await self._client_wrapper.httpx_client.request(
"POST",
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "v1/reranking"),
Expand Down
4 changes: 4 additions & 0 deletions mixedbread_ai/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@
from .invalid_matryoshka_dimensions_error import InvalidMatryoshkaDimensionsError
from .invalid_matryoshka_model_error import InvalidMatryoshkaModelError
from .model_not_found_error import ModelNotFoundError
from .multi_modal_input import MultiModalInput
from .multiple_encodings_embedding import MultipleEncodingsEmbedding
from .multiple_encodings_embedding_item import MultipleEncodingsEmbeddingItem
from .mxbai_api_error import MxbaiApiError
from .mxbai_api_error_details import MxbaiApiErrorDetails
from .not_found_error_body import NotFoundErrorBody
from .object_type import ObjectType
from .query import Query
from .ranked_document import RankedDocument
from .reranking_response import RerankingResponse
from .text_document import TextDocument
Expand All @@ -45,12 +47,14 @@
"InvalidMatryoshkaDimensionsError",
"InvalidMatryoshkaModelError",
"ModelNotFoundError",
"MultiModalInput",
"MultipleEncodingsEmbedding",
"MultipleEncodingsEmbeddingItem",
"MxbaiApiError",
"MxbaiApiErrorDetails",
"NotFoundErrorBody",
"ObjectType",
"Query",
"RankedDocument",
"RerankingResponse",
"TextDocument",
Expand Down
4 changes: 2 additions & 2 deletions mixedbread_ai/types/bad_request_error_body.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

class BadRequestErrorBody(pydantic.BaseModel):
type: typing.Optional[typing.Literal["bad_request_error"]] = None
details: typing.Optional[MxbaiApiErrorDetails] = None
message: typing.Optional[str] = None
url: typing.Optional[str] = None
message: typing.Optional[str] = None
details: typing.Optional[MxbaiApiErrorDetails] = None

def json(self, **kwargs: typing.Any) -> str:
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
Expand Down
Loading

0 comments on commit d7f4b3e

Please sign in to comment.