From 5cae098424ac1dacaeb9bc0657a4a158ee4409af Mon Sep 17 00:00:00 2001 From: Johnny Chadda Date: Wed, 23 Apr 2025 16:03:15 +0200 Subject: [PATCH 1/3] Add support for embeddings --- examples/async_embeddings_example.py | 45 +++++++++++++ examples/embeddings_example.py | 34 ++++++++++ src/opperai/__init__.py | 4 +- src/opperai/_client.py | 6 ++ src/opperai/_opper.py | 4 ++ src/opperai/core/embeddings/__init__.py | 1 + .../core/embeddings/_async_embeddings.py | 64 +++++++++++++++++++ src/opperai/core/embeddings/_embeddings.py | 64 +++++++++++++++++++ src/opperai/embeddings/__init__.py | 1 + src/opperai/embeddings/async_embeddings.py | 53 +++++++++++++++ src/opperai/embeddings/embeddings.py | 53 +++++++++++++++ src/opperai/evaluations/decorator.py | 61 ++++++++++++++++++ src/opperai/types/__init__.py | 1 + src/opperai/types/embeddings.py | 21 ++++++ 14 files changed, 411 insertions(+), 1 deletion(-) create mode 100644 examples/async_embeddings_example.py create mode 100644 examples/embeddings_example.py create mode 100644 src/opperai/core/embeddings/__init__.py create mode 100644 src/opperai/core/embeddings/_async_embeddings.py create mode 100644 src/opperai/core/embeddings/_embeddings.py create mode 100644 src/opperai/embeddings/__init__.py create mode 100644 src/opperai/embeddings/async_embeddings.py create mode 100644 src/opperai/embeddings/embeddings.py create mode 100644 src/opperai/evaluations/decorator.py create mode 100644 src/opperai/types/embeddings.py diff --git a/examples/async_embeddings_example.py b/examples/async_embeddings_example.py new file mode 100644 index 0000000..23fd610 --- /dev/null +++ b/examples/async_embeddings_example.py @@ -0,0 +1,45 @@ +import asyncio +import os + +from opperai import AsyncOpper + + +async def main(): + # Initialize the AsyncOpper client + api_key = os.getenv("OPPER_API_KEY") + opper = AsyncOpper(api_key=api_key) + + # Example 1: Generate embeddings for a single string + print("Example 1: Single text input") + response = await opper.embeddings.create( + model="text-embedding-ada-002", input_text="Hello, world!" + ) + + # Print the embedding vector for the input + print(f"Model used: {response.model}") + print(f"Embedding dimension: {len(response.data[0]['embedding'])}") + print(f"First few values: {response.data[0]['embedding'][:5]}") + print(f"Usage: {response.usage}") + + # Example 2: Generate embeddings for multiple strings (batch processing) + print("\nExample 2: List of texts input") + batch_response = await opper.embeddings.create( + model="text-embedding-ada-002", + input_text=[ + "Hello, world!", + "How are you?", + "Machine learning is fascinating.", + ], + ) + + # Print the embedding vectors information + print(f"Number of embeddings: {len(batch_response.data)}") + for i, embedding_data in enumerate(batch_response.data): + print(f"Embedding {i+1} dimension: {len(embedding_data['embedding'])}") + + # Print total token usage + print(f"Total token usage: {batch_response.usage}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/embeddings_example.py b/examples/embeddings_example.py new file mode 100644 index 0000000..ae35eb5 --- /dev/null +++ b/examples/embeddings_example.py @@ -0,0 +1,34 @@ +import os + +from opperai import Opper + +# Initialize the Opper client +api_key = os.getenv("OPPER_API_KEY") +opper = Opper(api_key=api_key) + +# Example 1: Generate embeddings for a single string +print("Example 1: Single text input") +response = opper.embeddings.create( + model="text-embedding-ada-002", input_text="Hello, world!" +) + +# Print the embedding vector for the input +print(f"Model used: {response.model}") +print(f"Embedding dimension: {len(response.data[0]['embedding'])}") +print(f"First few values: {response.data[0]['embedding'][:5]}") +print(f"Usage: {response.usage}") + +# Example 2: Generate embeddings for multiple strings (batch processing) +print("\nExample 2: List of texts input") +batch_response = opper.embeddings.create( + model="text-embedding-ada-002", + input_text=["Hello, world!", "How are you?", "Machine learning is fascinating."], +) + +# Print the embedding vectors information +print(f"Number of embeddings: {len(batch_response.data)}") +for i, embedding_data in enumerate(batch_response.data): + print(f"Embedding {i+1} dimension: {len(embedding_data['embedding'])}") + +# Print total token usage +print(f"Total token usage: {batch_response.usage}") diff --git a/src/opperai/__init__.py b/src/opperai/__init__.py index 099d56e..f369392 100644 --- a/src/opperai/__init__.py +++ b/src/opperai/__init__.py @@ -1,5 +1,8 @@ # ruff: noqa: F401 +from opperai.__version__ import __version__ from opperai.core.spans._decorator import start_span, trace +from opperai.embeddings.async_embeddings import AsyncEmbeddings +from opperai.embeddings.embeddings import Embeddings from opperai.functions.async_functions import AsyncFunctions from opperai.functions.decorator._decorator import fn, get_last_span_id from opperai.functions.functions import Functions @@ -8,6 +11,5 @@ from opperai.spans.async_spans import AsyncSpans from opperai.spans.spans import Spans -from opperai.__version__ import __version__ from ._client import AsyncClient, Client from ._opper import AsyncOpper, Opper diff --git a/src/opperai/_client.py b/src/opperai/_client.py index 4d959d5..093dd50 100644 --- a/src/opperai/_client.py +++ b/src/opperai/_client.py @@ -5,6 +5,8 @@ from opperai.core.datasets._async_datasets import AsyncDatasets from opperai.core.datasets._datasets import Datasets +from opperai.core.embeddings._async_embeddings import AsyncEmbeddings +from opperai.core.embeddings._embeddings import Embeddings from opperai.core.functions._async_functions import AsyncFunctions from opperai.core.functions._functions import Functions from opperai.core.indexes._async_indexes import AsyncIndexes @@ -31,6 +33,7 @@ class AsyncClient: indexes: AsyncIndexes = None spans: AsyncSpans = None datasets: AsyncDatasets = None + embeddings: AsyncEmbeddings = None def __init__( self, @@ -59,6 +62,7 @@ def __init__( self.indexes = AsyncIndexes(self.http_client) self.spans = AsyncSpans(self.http_client) self.datasets = AsyncDatasets(self.http_client) + self.embeddings = AsyncEmbeddings(self.http_client) async def generate_image( self, @@ -124,6 +128,7 @@ class Client: indexes: Indexes spans: Spans datasets: Datasets + embeddings: Embeddings def __init__( self, @@ -152,6 +157,7 @@ def __init__( self.indexes = Indexes(self.http_client) self.spans = Spans(self.http_client) self.datasets = Datasets(self.http_client) + self.embeddings = Embeddings(self.http_client) def generate_image( self, diff --git a/src/opperai/_opper.py b/src/opperai/_opper.py index 38770cf..8cfefac 100644 --- a/src/opperai/_opper.py +++ b/src/opperai/_opper.py @@ -1,5 +1,7 @@ from typing import Any, Optional, Tuple +from opperai.embeddings.async_embeddings import AsyncEmbeddings +from opperai.embeddings.embeddings import Embeddings from opperai.functions.async_functions import AsyncFunctions from opperai.functions.functions import Functions from opperai.indexes.async_indexes import AsyncIndexes @@ -32,6 +34,7 @@ def __init__( self.indexes: Indexes = Indexes(client) self.spans: Spans = Spans(client) # deprecated self.traces: Spans = self.spans + self.embeddings: Embeddings = Embeddings(client) self.call = self.functions.call @@ -54,4 +57,5 @@ def __init__( self.indexes: AsyncIndexes = AsyncIndexes(client) self.spans: AsyncSpans = AsyncSpans(client) # deprecated self.traces: AsyncSpans = self.spans + self.embeddings: AsyncEmbeddings = AsyncEmbeddings(client) self.call = self.functions.call diff --git a/src/opperai/core/embeddings/__init__.py b/src/opperai/core/embeddings/__init__.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/src/opperai/core/embeddings/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/src/opperai/core/embeddings/_async_embeddings.py b/src/opperai/core/embeddings/_async_embeddings.py new file mode 100644 index 0000000..2918e7a --- /dev/null +++ b/src/opperai/core/embeddings/_async_embeddings.py @@ -0,0 +1,64 @@ +from http import HTTPStatus +from typing import List, Optional, Union + +from opperai.core._http_clients import _async_http_client +from opperai.types import EmbeddingRequest, EmbeddingResponse +from opperai.types.exceptions import APIError + + +class AsyncEmbeddings: + def __init__(self, http_client: _async_http_client): + self.http_client = http_client + + async def create( + self, + model: str, + input_text: Union[str, List[str]], + encoding_format: Optional[str] = None, + user: Optional[str] = None, + ) -> EmbeddingResponse: + """Create embeddings for the given input text asynchronously. + + Args: + model (str): The ID of the model to use for generating embeddings. + input_text (Union[str, List[str]]): The input text to obtain embeddings for. + This can be a single string or a list of strings. + encoding_format (str, optional): The format for the embedding vector data. + Defaults to None. + user (str, optional): A unique identifier for the end-user. Defaults to None. + + Returns: + EmbeddingResponse: An object containing the generated embeddings. + + Raises: + APIError: If the embeddings creation fails due to an API error. + + Examples: + >>> from opperai import AsyncClient + >>> client = AsyncClient(api_key="your_api_key_here") + >>> response = await client.embeddings.create( + ... model="text-embedding-ada-002", + ... input_text="Hello, world!" + ... ) + >>> print(response.data[0]) + [0.0023064255, -0.009327292, ...] + """ + request = EmbeddingRequest( + model=model, + input=input_text, + encoding_format=encoding_format, + user=user, + ) + + response = await self.http_client.do_request( + "POST", + "/v1/embeddings", + json=request.model_dump(exclude_none=True), + ) + + if response.status_code != HTTPStatus.OK: + raise APIError( + f"Failed to create embeddings with status {response.status_code}: {response.text}" + ) + + return EmbeddingResponse.model_validate(response.json()) diff --git a/src/opperai/core/embeddings/_embeddings.py b/src/opperai/core/embeddings/_embeddings.py new file mode 100644 index 0000000..46ec144 --- /dev/null +++ b/src/opperai/core/embeddings/_embeddings.py @@ -0,0 +1,64 @@ +from http import HTTPStatus +from typing import List, Optional, Union + +from opperai.core._http_clients import _http_client +from opperai.types import EmbeddingRequest, EmbeddingResponse +from opperai.types.exceptions import APIError + + +class Embeddings: + def __init__(self, http_client: _http_client): + self.http_client = http_client + + def create( + self, + model: str, + input_text: Union[str, List[str]], + encoding_format: Optional[str] = None, + user: Optional[str] = None, + ) -> EmbeddingResponse: + """Create embeddings for the given input text. + + Args: + model (str): The ID of the model to use for generating embeddings. + input_text (Union[str, List[str]]): The input text to obtain embeddings for. + This can be a single string or a list of strings. + encoding_format (str, optional): The format for the embedding vector data. + Defaults to None. + user (str, optional): A unique identifier for the end-user. Defaults to None. + + Returns: + EmbeddingResponse: An object containing the generated embeddings. + + Raises: + APIError: If the embeddings creation fails due to an API error. + + Examples: + >>> from opperai import Client + >>> client = Client(api_key="your_api_key_here") + >>> response = client.embeddings.create( + ... model="text-embedding-ada-002", + ... input_text="Hello, world!" + ... ) + >>> print(response.data[0]) + [0.0023064255, -0.009327292, ...] + """ + request = EmbeddingRequest( + model=model, + input=input_text, + encoding_format=encoding_format, + user=user, + ) + + response = self.http_client.do_request( + "POST", + "/v1/embeddings", + json=request.model_dump(exclude_none=True), + ) + + if response.status_code != HTTPStatus.OK: + raise APIError( + f"Failed to create embeddings with status {response.status_code}: {response.text}" + ) + + return EmbeddingResponse.model_validate(response.json()) diff --git a/src/opperai/embeddings/__init__.py b/src/opperai/embeddings/__init__.py new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/src/opperai/embeddings/__init__.py @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/src/opperai/embeddings/async_embeddings.py b/src/opperai/embeddings/async_embeddings.py new file mode 100644 index 0000000..e7507bb --- /dev/null +++ b/src/opperai/embeddings/async_embeddings.py @@ -0,0 +1,53 @@ +from dataclasses import dataclass +from typing import List, Optional, Union + +from opperai._client import AsyncClient +from opperai.types import EmbeddingResponse + + +@dataclass +class AsyncEmbeddings: + _client: AsyncClient = None + + def __init__(self, client: Optional[AsyncClient] = None): + if client is None: + client = AsyncClient() + + self._client = client + + async def create( + self, + model: str, + input_text: Union[str, List[str]], + encoding_format: Optional[str] = None, + user: Optional[str] = None, + ) -> EmbeddingResponse: + """Create embeddings for the given input text asynchronously. + + Args: + model (str): The ID of the model to use for generating embeddings. + input_text (Union[str, List[str]]): The input text to obtain embeddings for. + This can be a single string or a list of strings. + encoding_format (str, optional): The format for the embedding vector data. + Defaults to None. + user (str, optional): A unique identifier for the end-user. Defaults to None. + + Returns: + EmbeddingResponse: An object containing the generated embeddings. + + Examples: + >>> from opperai import AsyncOpper + >>> client = AsyncOpper() + >>> response = await client.embeddings.create( + ... model="text-embedding-ada-002", + ... input_text="Hello, world!" + ... ) + >>> print(response.data[0]) + [0.0023064255, -0.009327292, ...] + """ + return await self._client.embeddings.create( + model=model, + input_text=input_text, + encoding_format=encoding_format, + user=user, + ) diff --git a/src/opperai/embeddings/embeddings.py b/src/opperai/embeddings/embeddings.py new file mode 100644 index 0000000..c2d346b --- /dev/null +++ b/src/opperai/embeddings/embeddings.py @@ -0,0 +1,53 @@ +from dataclasses import dataclass +from typing import List, Optional, Union + +from opperai._client import Client +from opperai.types import EmbeddingResponse + + +@dataclass +class Embeddings: + _client: Client = None + + def __init__(self, client: Optional[Client] = None): + if client is None: + client = Client() + + self._client = client + + def create( + self, + model: str, + input_text: Union[str, List[str]], + encoding_format: Optional[str] = None, + user: Optional[str] = None, + ) -> EmbeddingResponse: + """Create embeddings for the given input text. + + Args: + model (str): The ID of the model to use for generating embeddings. + input_text (Union[str, List[str]]): The input text to obtain embeddings for. + This can be a single string or a list of strings. + encoding_format (str, optional): The format for the embedding vector data. + Defaults to None. + user (str, optional): A unique identifier for the end-user. Defaults to None. + + Returns: + EmbeddingResponse: An object containing the generated embeddings. + + Examples: + >>> from opperai import Opper + >>> client = Opper() + >>> response = client.embeddings.create( + ... model="text-embedding-ada-002", + ... input_text="Hello, world!" + ... ) + >>> print(response.data[0]) + [0.0023064255, -0.009327292, ...] + """ + return self._client.embeddings.create( + model=model, + input_text=input_text, + encoding_format=encoding_format, + user=user, + ) diff --git a/src/opperai/evaluations/decorator.py b/src/opperai/evaluations/decorator.py new file mode 100644 index 0000000..e42b6f3 --- /dev/null +++ b/src/opperai/evaluations/decorator.py @@ -0,0 +1,61 @@ +from typing import List + +from opperai.evaluations._base import Evaluation +from opperai.types import Metric + + +def evaluator(func=None, **decorator_kwargs): + """Decorator to create an evaluator function. + + The decorated function should return a list of Metric objects. + + Args: + func: The function to decorate + **decorator_kwargs: Default parameters to pass to the function + + Returns: + A function that can be called with parameters to create metrics + """ + if func is None: + # Called with parameters: @evaluator(param=value) + def decorator(f): + def evaluator_func(**kwargs): + # Merge default kwargs with provided kwargs + combined_kwargs = {**decorator_kwargs, **kwargs} + return f(**combined_kwargs) + + # Copy the original function name and docstring + evaluator_func.__name__ = f.__name__ + evaluator_func.__doc__ = f.__doc__ + return evaluator_func + + return decorator + + # Called without parameters: @evaluator + def evaluator_func(**kwargs): + return func(**kwargs) + + # Copy the original function name and docstring + evaluator_func.__name__ = func.__name__ + evaluator_func.__doc__ = func.__doc__ + return evaluator_func + + +async def process_metrics(metric_group: str, metrics: List[Metric]) -> Evaluation: + """Process a list of metrics into an Evaluation. + + Args: + metric_group: Name/group for these metrics + metrics: List of metrics returned by the evaluator + + Returns: + Evaluation result + """ + # Ensure we have a list of metrics + if not isinstance(metrics, list): + metrics = [metrics] + + # Just store the metrics with the group name + eval_metrics = {metric_group: metrics} + + return Evaluation(metrics=eval_metrics) diff --git a/src/opperai/types/__init__.py b/src/opperai/types/__init__.py index ebe9f98..3f531d5 100644 --- a/src/opperai/types/__init__.py +++ b/src/opperai/types/__init__.py @@ -10,6 +10,7 @@ from pydantic import BaseModel, ConfigDict, Field, FilePath, computed_field from .datasets import DatasetEntry +from .embeddings import EmbeddingRequest, EmbeddingResponse from .indexes import Document, DocumentIn, Filter, RetrievalResponse from .spans import SpanMetric from .validators import validate_uuid_xor_path diff --git a/src/opperai/types/embeddings.py b/src/opperai/types/embeddings.py new file mode 100644 index 0000000..5eda70c --- /dev/null +++ b/src/opperai/types/embeddings.py @@ -0,0 +1,21 @@ +from typing import Any, List, Optional, Union + +from pydantic import BaseModel + + +class EmbeddingRequest(BaseModel): + """Request model for embeddings API.""" + + model: str + input: Union[str, List[str]] + encoding_format: Optional[str] = None + user: Optional[str] = None + + +class EmbeddingResponse(BaseModel): + """Response model for embeddings API.""" + + object: str = "list" + model: str + data: List[Any] + usage: dict From 8ed4f5fb2b0196eee43db8238a32c590b14b3d3d Mon Sep 17 00:00:00 2001 From: Johnny Chadda Date: Wed, 23 Apr 2025 16:07:27 +0200 Subject: [PATCH 2/3] Bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2699731..c3db939 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "opperai" -version = "0.29.0" +version = "0.30.0" description = "Opper Python client" authors = [{ name = "Opper", email = "support@opper.ai" }] requires-python = "~=3.9" From e75ade5d79ace15b1ba8a6b69e5a4368705615fc Mon Sep 17 00:00:00 2001 From: Johnny Chadda Date: Wed, 23 Apr 2025 16:16:17 +0200 Subject: [PATCH 3/3] Fix ruff lint --- src/opperai/core/embeddings/__init__.py | 1 - src/opperai/embeddings/__init__.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/opperai/core/embeddings/__init__.py b/src/opperai/core/embeddings/__init__.py index 0519ecb..e69de29 100644 --- a/src/opperai/core/embeddings/__init__.py +++ b/src/opperai/core/embeddings/__init__.py @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/src/opperai/embeddings/__init__.py b/src/opperai/embeddings/__init__.py index 0519ecb..e69de29 100644 --- a/src/opperai/embeddings/__init__.py +++ b/src/opperai/embeddings/__init__.py @@ -1 +0,0 @@ - \ No newline at end of file