From 7e350d4f2e80b9cf517781071ccc373c0d5a5da9 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
<142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 6 Feb 2025 07:37:57 +0000
Subject: [PATCH] feat(api): update via SDK Studio
---
.stats.yml | 2 +-
api.md | 41 ++
src/mixedbread/_client.py | 9 +
src/mixedbread/resources/__init__.py | 14 +
.../resources/extractions/__init__.py | 61 +++
.../resources/extractions/content.py | 196 ++++++++++
.../resources/extractions/extractions.py | 166 ++++++++
src/mixedbread/resources/extractions/jobs.py | 286 ++++++++++++++
.../resources/extractions/schema.py | 356 ++++++++++++++++++
src/mixedbread/types/extractions/__init__.py | 14 +
.../extractions/content_create_params.py | 15 +
.../types/extractions/created_json_schema.py | 11 +
.../types/extractions/enhanced_json_schema.py | 11 +
.../types/extractions/extraction_job.py | 13 +
.../types/extractions/extraction_result.py | 11 +
.../types/extractions/job_create_params.py | 15 +
.../types/extractions/schema_create_params.py | 12 +
.../extractions/schema_enhance_params.py | 12 +
.../extractions/schema_validate_params.py | 12 +
.../extractions/validated_json_schema.py | 18 +
tests/api_resources/extractions/__init__.py | 1 +
.../api_resources/extractions/test_content.py | 90 +++++
tests/api_resources/extractions/test_jobs.py | 166 ++++++++
.../api_resources/extractions/test_schema.py | 212 +++++++++++
24 files changed, 1743 insertions(+), 1 deletion(-)
create mode 100644 src/mixedbread/resources/extractions/__init__.py
create mode 100644 src/mixedbread/resources/extractions/content.py
create mode 100644 src/mixedbread/resources/extractions/extractions.py
create mode 100644 src/mixedbread/resources/extractions/jobs.py
create mode 100644 src/mixedbread/resources/extractions/schema.py
create mode 100644 src/mixedbread/types/extractions/__init__.py
create mode 100644 src/mixedbread/types/extractions/content_create_params.py
create mode 100644 src/mixedbread/types/extractions/created_json_schema.py
create mode 100644 src/mixedbread/types/extractions/enhanced_json_schema.py
create mode 100644 src/mixedbread/types/extractions/extraction_job.py
create mode 100644 src/mixedbread/types/extractions/extraction_result.py
create mode 100644 src/mixedbread/types/extractions/job_create_params.py
create mode 100644 src/mixedbread/types/extractions/schema_create_params.py
create mode 100644 src/mixedbread/types/extractions/schema_enhance_params.py
create mode 100644 src/mixedbread/types/extractions/schema_validate_params.py
create mode 100644 src/mixedbread/types/extractions/validated_json_schema.py
create mode 100644 tests/api_resources/extractions/__init__.py
create mode 100644 tests/api_resources/extractions/test_content.py
create mode 100644 tests/api_resources/extractions/test_jobs.py
create mode 100644 tests/api_resources/extractions/test_schema.py
diff --git a/.stats.yml b/.stats.yml
index 87bccfa5..7e00acb0 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
-configured_endpoints: 24
+configured_endpoints: 30
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-8d823477cb6d9c193492b79d7a52280e76535d9171df279807e4c145c29737e6.yml
diff --git a/api.md b/api.md
index c19bfce7..be50ff97 100644
--- a/api.md
+++ b/api.md
@@ -98,3 +98,44 @@ Methods:
- client.vector_stores.files.list(vector_store_id, \*\*params) -> SyncLimitOffset[VectorStoreFile]
- client.vector_stores.files.delete(file_id, \*, vector_store_id) -> FileDeleteResponse
- client.vector_stores.files.search(\*\*params) -> FileSearchResponse
+
+# Extractions
+
+## Jobs
+
+Types:
+
+```python
+from mixedbread.types.extractions import ExtractionJob
+```
+
+Methods:
+
+- client.extractions.jobs.create(\*\*params) -> ExtractionJob
+- client.extractions.jobs.retrieve(job_id) -> ExtractionJob
+
+## Schema
+
+Types:
+
+```python
+from mixedbread.types.extractions import CreatedJsonSchema, EnhancedJsonSchema, ValidatedJsonSchema
+```
+
+Methods:
+
+- client.extractions.schema.create(\*\*params) -> CreatedJsonSchema
+- client.extractions.schema.enhance(\*\*params) -> EnhancedJsonSchema
+- client.extractions.schema.validate(\*\*params) -> ValidatedJsonSchema
+
+## Content
+
+Types:
+
+```python
+from mixedbread.types.extractions import ExtractionResult
+```
+
+Methods:
+
+- client.extractions.content.create(\*\*params) -> ExtractionResult
diff --git a/src/mixedbread/_client.py b/src/mixedbread/_client.py
index 9358b2b4..9b142105 100644
--- a/src/mixedbread/_client.py
+++ b/src/mixedbread/_client.py
@@ -44,6 +44,7 @@
)
from .resources.parsing import parsing
from .types.info_response import InfoResponse
+from .resources.extractions import extractions
from .resources.vector_stores import vector_stores
__all__ = [
@@ -68,6 +69,7 @@ class Mixedbread(SyncAPIClient):
parsing: parsing.ParsingResource
files: files.FilesResource
vector_stores: vector_stores.VectorStoresResource
+ extractions: extractions.ExtractionsResource
with_raw_response: MixedbreadWithRawResponse
with_streaming_response: MixedbreadWithStreamedResponse
@@ -152,6 +154,7 @@ def __init__(
self.parsing = parsing.ParsingResource(self)
self.files = files.FilesResource(self)
self.vector_stores = vector_stores.VectorStoresResource(self)
+ self.extractions = extractions.ExtractionsResource(self)
self.with_raw_response = MixedbreadWithRawResponse(self)
self.with_streaming_response = MixedbreadWithStreamedResponse(self)
@@ -289,6 +292,7 @@ class AsyncMixedbread(AsyncAPIClient):
parsing: parsing.AsyncParsingResource
files: files.AsyncFilesResource
vector_stores: vector_stores.AsyncVectorStoresResource
+ extractions: extractions.AsyncExtractionsResource
with_raw_response: AsyncMixedbreadWithRawResponse
with_streaming_response: AsyncMixedbreadWithStreamedResponse
@@ -373,6 +377,7 @@ def __init__(
self.parsing = parsing.AsyncParsingResource(self)
self.files = files.AsyncFilesResource(self)
self.vector_stores = vector_stores.AsyncVectorStoresResource(self)
+ self.extractions = extractions.AsyncExtractionsResource(self)
self.with_raw_response = AsyncMixedbreadWithRawResponse(self)
self.with_streaming_response = AsyncMixedbreadWithStreamedResponse(self)
@@ -511,6 +516,7 @@ def __init__(self, client: Mixedbread) -> None:
self.parsing = parsing.ParsingResourceWithRawResponse(client.parsing)
self.files = files.FilesResourceWithRawResponse(client.files)
self.vector_stores = vector_stores.VectorStoresResourceWithRawResponse(client.vector_stores)
+ self.extractions = extractions.ExtractionsResourceWithRawResponse(client.extractions)
self.info = to_raw_response_wrapper(
client.info,
@@ -522,6 +528,7 @@ def __init__(self, client: AsyncMixedbread) -> None:
self.parsing = parsing.AsyncParsingResourceWithRawResponse(client.parsing)
self.files = files.AsyncFilesResourceWithRawResponse(client.files)
self.vector_stores = vector_stores.AsyncVectorStoresResourceWithRawResponse(client.vector_stores)
+ self.extractions = extractions.AsyncExtractionsResourceWithRawResponse(client.extractions)
self.info = async_to_raw_response_wrapper(
client.info,
@@ -533,6 +540,7 @@ def __init__(self, client: Mixedbread) -> None:
self.parsing = parsing.ParsingResourceWithStreamingResponse(client.parsing)
self.files = files.FilesResourceWithStreamingResponse(client.files)
self.vector_stores = vector_stores.VectorStoresResourceWithStreamingResponse(client.vector_stores)
+ self.extractions = extractions.ExtractionsResourceWithStreamingResponse(client.extractions)
self.info = to_streamed_response_wrapper(
client.info,
@@ -544,6 +552,7 @@ def __init__(self, client: AsyncMixedbread) -> None:
self.parsing = parsing.AsyncParsingResourceWithStreamingResponse(client.parsing)
self.files = files.AsyncFilesResourceWithStreamingResponse(client.files)
self.vector_stores = vector_stores.AsyncVectorStoresResourceWithStreamingResponse(client.vector_stores)
+ self.extractions = extractions.AsyncExtractionsResourceWithStreamingResponse(client.extractions)
self.info = async_to_streamed_response_wrapper(
client.info,
diff --git a/src/mixedbread/resources/__init__.py b/src/mixedbread/resources/__init__.py
index d1fe9468..3aac6ed5 100644
--- a/src/mixedbread/resources/__init__.py
+++ b/src/mixedbread/resources/__init__.py
@@ -16,6 +16,14 @@
ParsingResourceWithStreamingResponse,
AsyncParsingResourceWithStreamingResponse,
)
+from .extractions import (
+ ExtractionsResource,
+ AsyncExtractionsResource,
+ ExtractionsResourceWithRawResponse,
+ AsyncExtractionsResourceWithRawResponse,
+ ExtractionsResourceWithStreamingResponse,
+ AsyncExtractionsResourceWithStreamingResponse,
+)
from .vector_stores import (
VectorStoresResource,
AsyncVectorStoresResource,
@@ -44,4 +52,10 @@
"AsyncVectorStoresResourceWithRawResponse",
"VectorStoresResourceWithStreamingResponse",
"AsyncVectorStoresResourceWithStreamingResponse",
+ "ExtractionsResource",
+ "AsyncExtractionsResource",
+ "ExtractionsResourceWithRawResponse",
+ "AsyncExtractionsResourceWithRawResponse",
+ "ExtractionsResourceWithStreamingResponse",
+ "AsyncExtractionsResourceWithStreamingResponse",
]
diff --git a/src/mixedbread/resources/extractions/__init__.py b/src/mixedbread/resources/extractions/__init__.py
new file mode 100644
index 00000000..d9a42002
--- /dev/null
+++ b/src/mixedbread/resources/extractions/__init__.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .jobs import (
+ JobsResource,
+ AsyncJobsResource,
+ JobsResourceWithRawResponse,
+ AsyncJobsResourceWithRawResponse,
+ JobsResourceWithStreamingResponse,
+ AsyncJobsResourceWithStreamingResponse,
+)
+from .schema import (
+ SchemaResource,
+ AsyncSchemaResource,
+ SchemaResourceWithRawResponse,
+ AsyncSchemaResourceWithRawResponse,
+ SchemaResourceWithStreamingResponse,
+ AsyncSchemaResourceWithStreamingResponse,
+)
+from .content import (
+ ContentResource,
+ AsyncContentResource,
+ ContentResourceWithRawResponse,
+ AsyncContentResourceWithRawResponse,
+ ContentResourceWithStreamingResponse,
+ AsyncContentResourceWithStreamingResponse,
+)
+from .extractions import (
+ ExtractionsResource,
+ AsyncExtractionsResource,
+ ExtractionsResourceWithRawResponse,
+ AsyncExtractionsResourceWithRawResponse,
+ ExtractionsResourceWithStreamingResponse,
+ AsyncExtractionsResourceWithStreamingResponse,
+)
+
+__all__ = [
+ "JobsResource",
+ "AsyncJobsResource",
+ "JobsResourceWithRawResponse",
+ "AsyncJobsResourceWithRawResponse",
+ "JobsResourceWithStreamingResponse",
+ "AsyncJobsResourceWithStreamingResponse",
+ "SchemaResource",
+ "AsyncSchemaResource",
+ "SchemaResourceWithRawResponse",
+ "AsyncSchemaResourceWithRawResponse",
+ "SchemaResourceWithStreamingResponse",
+ "AsyncSchemaResourceWithStreamingResponse",
+ "ContentResource",
+ "AsyncContentResource",
+ "ContentResourceWithRawResponse",
+ "AsyncContentResourceWithRawResponse",
+ "ContentResourceWithStreamingResponse",
+ "AsyncContentResourceWithStreamingResponse",
+ "ExtractionsResource",
+ "AsyncExtractionsResource",
+ "ExtractionsResourceWithRawResponse",
+ "AsyncExtractionsResourceWithRawResponse",
+ "ExtractionsResourceWithStreamingResponse",
+ "AsyncExtractionsResourceWithStreamingResponse",
+]
diff --git a/src/mixedbread/resources/extractions/content.py b/src/mixedbread/resources/extractions/content.py
new file mode 100644
index 00000000..cacd55a6
--- /dev/null
+++ b/src/mixedbread/resources/extractions/content.py
@@ -0,0 +1,196 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.extractions import content_create_params
+from ...types.extractions.extraction_result import ExtractionResult
+
+__all__ = ["ContentResource", "AsyncContentResource"]
+
+
+class ContentResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> ContentResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#accessing-raw-response-data-eg-headers
+ """
+ return ContentResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> ContentResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#with_streaming_response
+ """
+ return ContentResourceWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ content: str,
+ json_schema: object,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ExtractionResult:
+ """
+ Extract content from a string using the provided schema.
+
+ Args: params: The parameters for extracting content from a string.
+
+ Returns: The extracted content.
+
+ Args:
+ content: The content to extract from
+
+ json_schema: The JSON schema to use for extraction
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/v1/extractions/content",
+ body=maybe_transform(
+ {
+ "content": content,
+ "json_schema": json_schema,
+ },
+ content_create_params.ContentCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ExtractionResult,
+ )
+
+
+class AsyncContentResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncContentResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncContentResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncContentResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#with_streaming_response
+ """
+ return AsyncContentResourceWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ content: str,
+ json_schema: object,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ExtractionResult:
+ """
+ Extract content from a string using the provided schema.
+
+ Args: params: The parameters for extracting content from a string.
+
+ Returns: The extracted content.
+
+ Args:
+ content: The content to extract from
+
+ json_schema: The JSON schema to use for extraction
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/v1/extractions/content",
+ body=await async_maybe_transform(
+ {
+ "content": content,
+ "json_schema": json_schema,
+ },
+ content_create_params.ContentCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ExtractionResult,
+ )
+
+
+class ContentResourceWithRawResponse:
+ def __init__(self, content: ContentResource) -> None:
+ self._content = content
+
+ self.create = to_raw_response_wrapper(
+ content.create,
+ )
+
+
+class AsyncContentResourceWithRawResponse:
+ def __init__(self, content: AsyncContentResource) -> None:
+ self._content = content
+
+ self.create = async_to_raw_response_wrapper(
+ content.create,
+ )
+
+
+class ContentResourceWithStreamingResponse:
+ def __init__(self, content: ContentResource) -> None:
+ self._content = content
+
+ self.create = to_streamed_response_wrapper(
+ content.create,
+ )
+
+
+class AsyncContentResourceWithStreamingResponse:
+ def __init__(self, content: AsyncContentResource) -> None:
+ self._content = content
+
+ self.create = async_to_streamed_response_wrapper(
+ content.create,
+ )
diff --git a/src/mixedbread/resources/extractions/extractions.py b/src/mixedbread/resources/extractions/extractions.py
new file mode 100644
index 00000000..eebb3e40
--- /dev/null
+++ b/src/mixedbread/resources/extractions/extractions.py
@@ -0,0 +1,166 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .jobs import (
+ JobsResource,
+ AsyncJobsResource,
+ JobsResourceWithRawResponse,
+ AsyncJobsResourceWithRawResponse,
+ JobsResourceWithStreamingResponse,
+ AsyncJobsResourceWithStreamingResponse,
+)
+from .schema import (
+ SchemaResource,
+ AsyncSchemaResource,
+ SchemaResourceWithRawResponse,
+ AsyncSchemaResourceWithRawResponse,
+ SchemaResourceWithStreamingResponse,
+ AsyncSchemaResourceWithStreamingResponse,
+)
+from .content import (
+ ContentResource,
+ AsyncContentResource,
+ ContentResourceWithRawResponse,
+ AsyncContentResourceWithRawResponse,
+ ContentResourceWithStreamingResponse,
+ AsyncContentResourceWithStreamingResponse,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["ExtractionsResource", "AsyncExtractionsResource"]
+
+
+class ExtractionsResource(SyncAPIResource):
+ @cached_property
+ def jobs(self) -> JobsResource:
+ return JobsResource(self._client)
+
+ @cached_property
+ def schema(self) -> SchemaResource:
+ return SchemaResource(self._client)
+
+ @cached_property
+ def content(self) -> ContentResource:
+ return ContentResource(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> ExtractionsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#accessing-raw-response-data-eg-headers
+ """
+ return ExtractionsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> ExtractionsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#with_streaming_response
+ """
+ return ExtractionsResourceWithStreamingResponse(self)
+
+
+class AsyncExtractionsResource(AsyncAPIResource):
+ @cached_property
+ def jobs(self) -> AsyncJobsResource:
+ return AsyncJobsResource(self._client)
+
+ @cached_property
+ def schema(self) -> AsyncSchemaResource:
+ return AsyncSchemaResource(self._client)
+
+ @cached_property
+ def content(self) -> AsyncContentResource:
+ return AsyncContentResource(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncExtractionsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncExtractionsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncExtractionsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#with_streaming_response
+ """
+ return AsyncExtractionsResourceWithStreamingResponse(self)
+
+
+class ExtractionsResourceWithRawResponse:
+ def __init__(self, extractions: ExtractionsResource) -> None:
+ self._extractions = extractions
+
+ @cached_property
+ def jobs(self) -> JobsResourceWithRawResponse:
+ return JobsResourceWithRawResponse(self._extractions.jobs)
+
+ @cached_property
+ def schema(self) -> SchemaResourceWithRawResponse:
+ return SchemaResourceWithRawResponse(self._extractions.schema)
+
+ @cached_property
+ def content(self) -> ContentResourceWithRawResponse:
+ return ContentResourceWithRawResponse(self._extractions.content)
+
+
+class AsyncExtractionsResourceWithRawResponse:
+ def __init__(self, extractions: AsyncExtractionsResource) -> None:
+ self._extractions = extractions
+
+ @cached_property
+ def jobs(self) -> AsyncJobsResourceWithRawResponse:
+ return AsyncJobsResourceWithRawResponse(self._extractions.jobs)
+
+ @cached_property
+ def schema(self) -> AsyncSchemaResourceWithRawResponse:
+ return AsyncSchemaResourceWithRawResponse(self._extractions.schema)
+
+ @cached_property
+ def content(self) -> AsyncContentResourceWithRawResponse:
+ return AsyncContentResourceWithRawResponse(self._extractions.content)
+
+
+class ExtractionsResourceWithStreamingResponse:
+ def __init__(self, extractions: ExtractionsResource) -> None:
+ self._extractions = extractions
+
+ @cached_property
+ def jobs(self) -> JobsResourceWithStreamingResponse:
+ return JobsResourceWithStreamingResponse(self._extractions.jobs)
+
+ @cached_property
+ def schema(self) -> SchemaResourceWithStreamingResponse:
+ return SchemaResourceWithStreamingResponse(self._extractions.schema)
+
+ @cached_property
+ def content(self) -> ContentResourceWithStreamingResponse:
+ return ContentResourceWithStreamingResponse(self._extractions.content)
+
+
+class AsyncExtractionsResourceWithStreamingResponse:
+ def __init__(self, extractions: AsyncExtractionsResource) -> None:
+ self._extractions = extractions
+
+ @cached_property
+ def jobs(self) -> AsyncJobsResourceWithStreamingResponse:
+ return AsyncJobsResourceWithStreamingResponse(self._extractions.jobs)
+
+ @cached_property
+ def schema(self) -> AsyncSchemaResourceWithStreamingResponse:
+ return AsyncSchemaResourceWithStreamingResponse(self._extractions.schema)
+
+ @cached_property
+ def content(self) -> AsyncContentResourceWithStreamingResponse:
+ return AsyncContentResourceWithStreamingResponse(self._extractions.content)
diff --git a/src/mixedbread/resources/extractions/jobs.py b/src/mixedbread/resources/extractions/jobs.py
new file mode 100644
index 00000000..3abc64f9
--- /dev/null
+++ b/src/mixedbread/resources/extractions/jobs.py
@@ -0,0 +1,286 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.extractions import job_create_params
+from ...types.extractions.extraction_job import ExtractionJob
+
+__all__ = ["JobsResource", "AsyncJobsResource"]
+
+
+class JobsResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> JobsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#accessing-raw-response-data-eg-headers
+ """
+ return JobsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> JobsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#with_streaming_response
+ """
+ return JobsResourceWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ file_id: str,
+ json_schema: object,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ExtractionJob:
+ """
+ Start an extraction job for the provided file and schema.
+
+ Args: params: The parameters for creating an extraction job.
+
+ Returns: The created extraction job.
+
+ Args:
+ file_id: The ID of the file to extract from
+
+ json_schema: The JSON schema to use for extraction
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/v1/extractions/jobs",
+ body=maybe_transform(
+ {
+ "file_id": file_id,
+ "json_schema": json_schema,
+ },
+ job_create_params.JobCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ExtractionJob,
+ )
+
+ def retrieve(
+ self,
+ job_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ExtractionJob:
+ """
+ Get detailed information about a specific extraction job.
+
+ Args: job_id: The ID of the extraction job.
+
+ Returns: Detailed information about the extraction job.
+
+ Args:
+ job_id: The ID of the extraction job to retrieve
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not job_id:
+ raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+ return self._get(
+ f"/v1/extractions/jobs/{job_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ExtractionJob,
+ )
+
+
+class AsyncJobsResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncJobsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncJobsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncJobsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#with_streaming_response
+ """
+ return AsyncJobsResourceWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ file_id: str,
+ json_schema: object,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ExtractionJob:
+ """
+ Start an extraction job for the provided file and schema.
+
+ Args: params: The parameters for creating an extraction job.
+
+ Returns: The created extraction job.
+
+ Args:
+ file_id: The ID of the file to extract from
+
+ json_schema: The JSON schema to use for extraction
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/v1/extractions/jobs",
+ body=await async_maybe_transform(
+ {
+ "file_id": file_id,
+ "json_schema": json_schema,
+ },
+ job_create_params.JobCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ExtractionJob,
+ )
+
+ async def retrieve(
+ self,
+ job_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ExtractionJob:
+ """
+ Get detailed information about a specific extraction job.
+
+ Args: job_id: The ID of the extraction job.
+
+ Returns: Detailed information about the extraction job.
+
+ Args:
+ job_id: The ID of the extraction job to retrieve
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not job_id:
+ raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+ return await self._get(
+ f"/v1/extractions/jobs/{job_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ExtractionJob,
+ )
+
+
+class JobsResourceWithRawResponse:
+ def __init__(self, jobs: JobsResource) -> None:
+ self._jobs = jobs
+
+ self.create = to_raw_response_wrapper(
+ jobs.create,
+ )
+ self.retrieve = to_raw_response_wrapper(
+ jobs.retrieve,
+ )
+
+
+class AsyncJobsResourceWithRawResponse:
+ def __init__(self, jobs: AsyncJobsResource) -> None:
+ self._jobs = jobs
+
+ self.create = async_to_raw_response_wrapper(
+ jobs.create,
+ )
+ self.retrieve = async_to_raw_response_wrapper(
+ jobs.retrieve,
+ )
+
+
+class JobsResourceWithStreamingResponse:
+ def __init__(self, jobs: JobsResource) -> None:
+ self._jobs = jobs
+
+ self.create = to_streamed_response_wrapper(
+ jobs.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ jobs.retrieve,
+ )
+
+
+class AsyncJobsResourceWithStreamingResponse:
+ def __init__(self, jobs: AsyncJobsResource) -> None:
+ self._jobs = jobs
+
+ self.create = async_to_streamed_response_wrapper(
+ jobs.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ jobs.retrieve,
+ )
diff --git a/src/mixedbread/resources/extractions/schema.py b/src/mixedbread/resources/extractions/schema.py
new file mode 100644
index 00000000..60ee00a5
--- /dev/null
+++ b/src/mixedbread/resources/extractions/schema.py
@@ -0,0 +1,356 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+ maybe_transform,
+ async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.extractions import schema_create_params, schema_enhance_params, schema_validate_params
+from ...types.extractions.created_json_schema import CreatedJsonSchema
+from ...types.extractions.enhanced_json_schema import EnhancedJsonSchema
+from ...types.extractions.validated_json_schema import ValidatedJsonSchema
+
+__all__ = ["SchemaResource", "AsyncSchemaResource"]
+
+
+class SchemaResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> SchemaResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#accessing-raw-response-data-eg-headers
+ """
+ return SchemaResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> SchemaResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#with_streaming_response
+ """
+ return SchemaResourceWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ description: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> CreatedJsonSchema:
+ """
+ Create a schema with the provided parameters.
+
+ Args: params: The parameters for creating a schema.
+
+ Returns: The created schema.
+
+ Args:
+ description: Description of the data to extract
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/v1/extractions/schema",
+ body=maybe_transform({"description": description}, schema_create_params.SchemaCreateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=CreatedJsonSchema,
+ )
+
+ def enhance(
+ self,
+ *,
+ json_schema: object,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EnhancedJsonSchema:
+ """
+ Enhance a schema by enriching the descriptions to aid extraction.
+
+ Args: params: The parameters for enhancing a schema.
+
+ Returns: The enhanced schema.
+
+ Args:
+ json_schema: The JSON schema to enhance
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/v1/extractions/schema/enhance",
+ body=maybe_transform({"json_schema": json_schema}, schema_enhance_params.SchemaEnhanceParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EnhancedJsonSchema,
+ )
+
+ def validate(
+ self,
+ *,
+ json_schema: object,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ValidatedJsonSchema:
+ """
+ Validate a schema.
+
+ Args: params: The parameters for validating a schema.
+
+ Returns: The validation result.
+
+ Args:
+ json_schema: The JSON schema to validate
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/v1/extractions/schema/validate",
+ body=maybe_transform({"json_schema": json_schema}, schema_validate_params.SchemaValidateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ValidatedJsonSchema,
+ )
+
+
+class AsyncSchemaResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncSchemaResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#accessing-raw-response-data-eg-headers
+ """
+ return AsyncSchemaResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncSchemaResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#with_streaming_response
+ """
+ return AsyncSchemaResourceWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ description: str,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> CreatedJsonSchema:
+ """
+ Create a schema with the provided parameters.
+
+ Args: params: The parameters for creating a schema.
+
+ Returns: The created schema.
+
+ Args:
+ description: Description of the data to extract
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/v1/extractions/schema",
+ body=await async_maybe_transform({"description": description}, schema_create_params.SchemaCreateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=CreatedJsonSchema,
+ )
+
+ async def enhance(
+ self,
+ *,
+ json_schema: object,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EnhancedJsonSchema:
+ """
+ Enhance a schema by enriching the descriptions to aid extraction.
+
+ Args: params: The parameters for enhancing a schema.
+
+ Returns: The enhanced schema.
+
+ Args:
+ json_schema: The JSON schema to enhance
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/v1/extractions/schema/enhance",
+ body=await async_maybe_transform({"json_schema": json_schema}, schema_enhance_params.SchemaEnhanceParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EnhancedJsonSchema,
+ )
+
+ async def validate(
+ self,
+ *,
+ json_schema: object,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ValidatedJsonSchema:
+ """
+ Validate a schema.
+
+ Args: params: The parameters for validating a schema.
+
+ Returns: The validation result.
+
+ Args:
+ json_schema: The JSON schema to validate
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/v1/extractions/schema/validate",
+ body=await async_maybe_transform({"json_schema": json_schema}, schema_validate_params.SchemaValidateParams),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ValidatedJsonSchema,
+ )
+
+
+class SchemaResourceWithRawResponse:
+ def __init__(self, schema: SchemaResource) -> None:
+ self._schema = schema
+
+ self.create = to_raw_response_wrapper(
+ schema.create,
+ )
+ self.enhance = to_raw_response_wrapper(
+ schema.enhance,
+ )
+ self.validate = to_raw_response_wrapper(
+ schema.validate,
+ )
+
+
+class AsyncSchemaResourceWithRawResponse:
+ def __init__(self, schema: AsyncSchemaResource) -> None:
+ self._schema = schema
+
+ self.create = async_to_raw_response_wrapper(
+ schema.create,
+ )
+ self.enhance = async_to_raw_response_wrapper(
+ schema.enhance,
+ )
+ self.validate = async_to_raw_response_wrapper(
+ schema.validate,
+ )
+
+
+class SchemaResourceWithStreamingResponse:
+ def __init__(self, schema: SchemaResource) -> None:
+ self._schema = schema
+
+ self.create = to_streamed_response_wrapper(
+ schema.create,
+ )
+ self.enhance = to_streamed_response_wrapper(
+ schema.enhance,
+ )
+ self.validate = to_streamed_response_wrapper(
+ schema.validate,
+ )
+
+
+class AsyncSchemaResourceWithStreamingResponse:
+ def __init__(self, schema: AsyncSchemaResource) -> None:
+ self._schema = schema
+
+ self.create = async_to_streamed_response_wrapper(
+ schema.create,
+ )
+ self.enhance = async_to_streamed_response_wrapper(
+ schema.enhance,
+ )
+ self.validate = async_to_streamed_response_wrapper(
+ schema.validate,
+ )
diff --git a/src/mixedbread/types/extractions/__init__.py b/src/mixedbread/types/extractions/__init__.py
new file mode 100644
index 00000000..6457c670
--- /dev/null
+++ b/src/mixedbread/types/extractions/__init__.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .extraction_job import ExtractionJob as ExtractionJob
+from .extraction_result import ExtractionResult as ExtractionResult
+from .job_create_params import JobCreateParams as JobCreateParams
+from .created_json_schema import CreatedJsonSchema as CreatedJsonSchema
+from .enhanced_json_schema import EnhancedJsonSchema as EnhancedJsonSchema
+from .schema_create_params import SchemaCreateParams as SchemaCreateParams
+from .content_create_params import ContentCreateParams as ContentCreateParams
+from .schema_enhance_params import SchemaEnhanceParams as SchemaEnhanceParams
+from .validated_json_schema import ValidatedJsonSchema as ValidatedJsonSchema
+from .schema_validate_params import SchemaValidateParams as SchemaValidateParams
diff --git a/src/mixedbread/types/extractions/content_create_params.py b/src/mixedbread/types/extractions/content_create_params.py
new file mode 100644
index 00000000..2bbdada2
--- /dev/null
+++ b/src/mixedbread/types/extractions/content_create_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["ContentCreateParams"]
+
+
+class ContentCreateParams(TypedDict, total=False):
+ content: Required[str]
+ """The content to extract from"""
+
+ json_schema: Required[object]
+ """The JSON schema to use for extraction"""
diff --git a/src/mixedbread/types/extractions/created_json_schema.py b/src/mixedbread/types/extractions/created_json_schema.py
new file mode 100644
index 00000000..4b50c4c6
--- /dev/null
+++ b/src/mixedbread/types/extractions/created_json_schema.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["CreatedJsonSchema"]
+
+
+class CreatedJsonSchema(BaseModel):
+ json_schema: object
+ """The created JSON schema"""
diff --git a/src/mixedbread/types/extractions/enhanced_json_schema.py b/src/mixedbread/types/extractions/enhanced_json_schema.py
new file mode 100644
index 00000000..7b2ab04a
--- /dev/null
+++ b/src/mixedbread/types/extractions/enhanced_json_schema.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["EnhancedJsonSchema"]
+
+
+class EnhancedJsonSchema(BaseModel):
+ json_schema: object
+ """The enhanced JSON schema"""
diff --git a/src/mixedbread/types/extractions/extraction_job.py b/src/mixedbread/types/extractions/extraction_job.py
new file mode 100644
index 00000000..5d8a1604
--- /dev/null
+++ b/src/mixedbread/types/extractions/extraction_job.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .extraction_result import ExtractionResult
+
+__all__ = ["ExtractionJob"]
+
+
+class ExtractionJob(BaseModel):
+ result: Optional[ExtractionResult] = None
+ """Result of an extraction operation."""
diff --git a/src/mixedbread/types/extractions/extraction_result.py b/src/mixedbread/types/extractions/extraction_result.py
new file mode 100644
index 00000000..fbac5d5f
--- /dev/null
+++ b/src/mixedbread/types/extractions/extraction_result.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["ExtractionResult"]
+
+
+class ExtractionResult(BaseModel):
+ data: object
+ """The extracted data"""
diff --git a/src/mixedbread/types/extractions/job_create_params.py b/src/mixedbread/types/extractions/job_create_params.py
new file mode 100644
index 00000000..476154b2
--- /dev/null
+++ b/src/mixedbread/types/extractions/job_create_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["JobCreateParams"]
+
+
+class JobCreateParams(TypedDict, total=False):
+ file_id: Required[str]
+ """The ID of the file to extract from"""
+
+ json_schema: Required[object]
+ """The JSON schema to use for extraction"""
diff --git a/src/mixedbread/types/extractions/schema_create_params.py b/src/mixedbread/types/extractions/schema_create_params.py
new file mode 100644
index 00000000..8625ea66
--- /dev/null
+++ b/src/mixedbread/types/extractions/schema_create_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["SchemaCreateParams"]
+
+
+class SchemaCreateParams(TypedDict, total=False):
+ description: Required[str]
+ """Description of the data to extract"""
diff --git a/src/mixedbread/types/extractions/schema_enhance_params.py b/src/mixedbread/types/extractions/schema_enhance_params.py
new file mode 100644
index 00000000..e0facf34
--- /dev/null
+++ b/src/mixedbread/types/extractions/schema_enhance_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["SchemaEnhanceParams"]
+
+
+class SchemaEnhanceParams(TypedDict, total=False):
+ json_schema: Required[object]
+ """The JSON schema to enhance"""
diff --git a/src/mixedbread/types/extractions/schema_validate_params.py b/src/mixedbread/types/extractions/schema_validate_params.py
new file mode 100644
index 00000000..947182ca
--- /dev/null
+++ b/src/mixedbread/types/extractions/schema_validate_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["SchemaValidateParams"]
+
+
+class SchemaValidateParams(TypedDict, total=False):
+ json_schema: Required[object]
+ """The JSON schema to validate"""
diff --git a/src/mixedbread/types/extractions/validated_json_schema.py b/src/mixedbread/types/extractions/validated_json_schema.py
new file mode 100644
index 00000000..cb021c31
--- /dev/null
+++ b/src/mixedbread/types/extractions/validated_json_schema.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from ..._models import BaseModel
+
+__all__ = ["ValidatedJsonSchema"]
+
+
+class ValidatedJsonSchema(BaseModel):
+ is_valid: bool
+ """Whether the schema is valid"""
+
+ errors: List[str]
+ """List of validation errors"""
+
+ json_schema: object
+ """The validated JSON schema"""
diff --git a/tests/api_resources/extractions/__init__.py b/tests/api_resources/extractions/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/extractions/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/extractions/test_content.py b/tests/api_resources/extractions/test_content.py
new file mode 100644
index 00000000..7beb468b
--- /dev/null
+++ b/tests/api_resources/extractions/test_content.py
@@ -0,0 +1,90 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from mixedbread import Mixedbread, AsyncMixedbread
+from tests.utils import assert_matches_type
+from mixedbread.types.extractions import ExtractionResult
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestContent:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create(self, client: Mixedbread) -> None:
+ content = client.extractions.content.create(
+ content="content",
+ json_schema={},
+ )
+ assert_matches_type(ExtractionResult, content, path=["response"])
+
+ @parametrize
+ def test_raw_response_create(self, client: Mixedbread) -> None:
+ response = client.extractions.content.with_raw_response.create(
+ content="content",
+ json_schema={},
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ content = response.parse()
+ assert_matches_type(ExtractionResult, content, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create(self, client: Mixedbread) -> None:
+ with client.extractions.content.with_streaming_response.create(
+ content="content",
+ json_schema={},
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ content = response.parse()
+ assert_matches_type(ExtractionResult, content, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncContent:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_create(self, async_client: AsyncMixedbread) -> None:
+ content = await async_client.extractions.content.create(
+ content="content",
+ json_schema={},
+ )
+ assert_matches_type(ExtractionResult, content, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create(self, async_client: AsyncMixedbread) -> None:
+ response = await async_client.extractions.content.with_raw_response.create(
+ content="content",
+ json_schema={},
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ content = await response.parse()
+ assert_matches_type(ExtractionResult, content, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create(self, async_client: AsyncMixedbread) -> None:
+ async with async_client.extractions.content.with_streaming_response.create(
+ content="content",
+ json_schema={},
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ content = await response.parse()
+ assert_matches_type(ExtractionResult, content, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/extractions/test_jobs.py b/tests/api_resources/extractions/test_jobs.py
new file mode 100644
index 00000000..bb055ba7
--- /dev/null
+++ b/tests/api_resources/extractions/test_jobs.py
@@ -0,0 +1,166 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from mixedbread import Mixedbread, AsyncMixedbread
+from tests.utils import assert_matches_type
+from mixedbread.types.extractions import ExtractionJob
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestJobs:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create(self, client: Mixedbread) -> None:
+ job = client.extractions.jobs.create(
+ file_id="file_id",
+ json_schema={},
+ )
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ @parametrize
+ def test_raw_response_create(self, client: Mixedbread) -> None:
+ response = client.extractions.jobs.with_raw_response.create(
+ file_id="file_id",
+ json_schema={},
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ job = response.parse()
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create(self, client: Mixedbread) -> None:
+ with client.extractions.jobs.with_streaming_response.create(
+ file_id="file_id",
+ json_schema={},
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ job = response.parse()
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_method_retrieve(self, client: Mixedbread) -> None:
+ job = client.extractions.jobs.retrieve(
+ "job_id",
+ )
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ @parametrize
+ def test_raw_response_retrieve(self, client: Mixedbread) -> None:
+ response = client.extractions.jobs.with_raw_response.retrieve(
+ "job_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ job = response.parse()
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ @parametrize
+ def test_streaming_response_retrieve(self, client: Mixedbread) -> None:
+ with client.extractions.jobs.with_streaming_response.retrieve(
+ "job_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ job = response.parse()
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_retrieve(self, client: Mixedbread) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+ client.extractions.jobs.with_raw_response.retrieve(
+ "",
+ )
+
+
+class TestAsyncJobs:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_create(self, async_client: AsyncMixedbread) -> None:
+ job = await async_client.extractions.jobs.create(
+ file_id="file_id",
+ json_schema={},
+ )
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create(self, async_client: AsyncMixedbread) -> None:
+ response = await async_client.extractions.jobs.with_raw_response.create(
+ file_id="file_id",
+ json_schema={},
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ job = await response.parse()
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create(self, async_client: AsyncMixedbread) -> None:
+ async with async_client.extractions.jobs.with_streaming_response.create(
+ file_id="file_id",
+ json_schema={},
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ job = await response.parse()
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_retrieve(self, async_client: AsyncMixedbread) -> None:
+ job = await async_client.extractions.jobs.retrieve(
+ "job_id",
+ )
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ @parametrize
+ async def test_raw_response_retrieve(self, async_client: AsyncMixedbread) -> None:
+ response = await async_client.extractions.jobs.with_raw_response.retrieve(
+ "job_id",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ job = await response.parse()
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_retrieve(self, async_client: AsyncMixedbread) -> None:
+ async with async_client.extractions.jobs.with_streaming_response.retrieve(
+ "job_id",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ job = await response.parse()
+ assert_matches_type(ExtractionJob, job, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_retrieve(self, async_client: AsyncMixedbread) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+ await async_client.extractions.jobs.with_raw_response.retrieve(
+ "",
+ )
diff --git a/tests/api_resources/extractions/test_schema.py b/tests/api_resources/extractions/test_schema.py
new file mode 100644
index 00000000..5bff1298
--- /dev/null
+++ b/tests/api_resources/extractions/test_schema.py
@@ -0,0 +1,212 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from mixedbread import Mixedbread, AsyncMixedbread
+from tests.utils import assert_matches_type
+from mixedbread.types.extractions import (
+ CreatedJsonSchema,
+ EnhancedJsonSchema,
+ ValidatedJsonSchema,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestSchema:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create(self, client: Mixedbread) -> None:
+ schema = client.extractions.schema.create(
+ description="description",
+ )
+ assert_matches_type(CreatedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ def test_raw_response_create(self, client: Mixedbread) -> None:
+ response = client.extractions.schema.with_raw_response.create(
+ description="description",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ schema = response.parse()
+ assert_matches_type(CreatedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create(self, client: Mixedbread) -> None:
+ with client.extractions.schema.with_streaming_response.create(
+ description="description",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ schema = response.parse()
+ assert_matches_type(CreatedJsonSchema, schema, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_method_enhance(self, client: Mixedbread) -> None:
+ schema = client.extractions.schema.enhance(
+ json_schema={},
+ )
+ assert_matches_type(EnhancedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ def test_raw_response_enhance(self, client: Mixedbread) -> None:
+ response = client.extractions.schema.with_raw_response.enhance(
+ json_schema={},
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ schema = response.parse()
+ assert_matches_type(EnhancedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ def test_streaming_response_enhance(self, client: Mixedbread) -> None:
+ with client.extractions.schema.with_streaming_response.enhance(
+ json_schema={},
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ schema = response.parse()
+ assert_matches_type(EnhancedJsonSchema, schema, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_method_validate(self, client: Mixedbread) -> None:
+ schema = client.extractions.schema.validate(
+ json_schema={},
+ )
+ assert_matches_type(ValidatedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ def test_raw_response_validate(self, client: Mixedbread) -> None:
+ response = client.extractions.schema.with_raw_response.validate(
+ json_schema={},
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ schema = response.parse()
+ assert_matches_type(ValidatedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ def test_streaming_response_validate(self, client: Mixedbread) -> None:
+ with client.extractions.schema.with_streaming_response.validate(
+ json_schema={},
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ schema = response.parse()
+ assert_matches_type(ValidatedJsonSchema, schema, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncSchema:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_create(self, async_client: AsyncMixedbread) -> None:
+ schema = await async_client.extractions.schema.create(
+ description="description",
+ )
+ assert_matches_type(CreatedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create(self, async_client: AsyncMixedbread) -> None:
+ response = await async_client.extractions.schema.with_raw_response.create(
+ description="description",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ schema = await response.parse()
+ assert_matches_type(CreatedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create(self, async_client: AsyncMixedbread) -> None:
+ async with async_client.extractions.schema.with_streaming_response.create(
+ description="description",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ schema = await response.parse()
+ assert_matches_type(CreatedJsonSchema, schema, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_enhance(self, async_client: AsyncMixedbread) -> None:
+ schema = await async_client.extractions.schema.enhance(
+ json_schema={},
+ )
+ assert_matches_type(EnhancedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ async def test_raw_response_enhance(self, async_client: AsyncMixedbread) -> None:
+ response = await async_client.extractions.schema.with_raw_response.enhance(
+ json_schema={},
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ schema = await response.parse()
+ assert_matches_type(EnhancedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_enhance(self, async_client: AsyncMixedbread) -> None:
+ async with async_client.extractions.schema.with_streaming_response.enhance(
+ json_schema={},
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ schema = await response.parse()
+ assert_matches_type(EnhancedJsonSchema, schema, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_validate(self, async_client: AsyncMixedbread) -> None:
+ schema = await async_client.extractions.schema.validate(
+ json_schema={},
+ )
+ assert_matches_type(ValidatedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ async def test_raw_response_validate(self, async_client: AsyncMixedbread) -> None:
+ response = await async_client.extractions.schema.with_raw_response.validate(
+ json_schema={},
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ schema = await response.parse()
+ assert_matches_type(ValidatedJsonSchema, schema, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_validate(self, async_client: AsyncMixedbread) -> None:
+ async with async_client.extractions.schema.with_streaming_response.validate(
+ json_schema={},
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ schema = await response.parse()
+ assert_matches_type(ValidatedJsonSchema, schema, path=["response"])
+
+ assert cast(Any, response.is_closed) is True