diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 55d20255..ff261bad 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} USER vscode -RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.35.0" RYE_INSTALL_OPTION="--yes" bash +RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.44.0" RYE_INSTALL_OPTION="--yes" bash ENV PATH=/home/vscode/.rye/shims:$PATH RUN echo "[[ -d .venv ]] && source .venv/bin/activate || export PATH=\$PATH" >> /home/vscode/.bashrc diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c8a8a4f7..a7180a90 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,18 +1,18 @@ name: CI on: push: - branches: - - main - pull_request: - branches: - - main - - next + branches-ignore: + - 'generated' + - 'codegen/**' + - 'integrated/**' + - 'stl-preview-head/**' + - 'stl-preview-base/**' jobs: lint: + timeout-minutes: 10 name: lint - runs-on: ubuntu-latest - + runs-on: ${{ github.repository == 'stainless-sdks/togetherai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} steps: - uses: actions/checkout@v4 @@ -21,7 +21,7 @@ jobs: curl -sSf https://rye.astral.sh/get | bash echo "$HOME/.rye/shims" >> $GITHUB_PATH env: - RYE_VERSION: '0.35.0' + RYE_VERSION: '0.44.0' RYE_INSTALL_OPTION: '--yes' - name: Install dependencies @@ -31,9 +31,9 @@ jobs: run: ./scripts/lint test: + timeout-minutes: 10 name: test - runs-on: ubuntu-latest - + runs-on: ${{ github.repository == 'stainless-sdks/togetherai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} steps: - uses: actions/checkout@v4 @@ -42,7 +42,7 @@ jobs: curl -sSf https://rye.astral.sh/get | bash echo "$HOME/.rye/shims" >> $GITHUB_PATH env: - RYE_VERSION: '0.35.0' + RYE_VERSION: '0.44.0' RYE_INSTALL_OPTION: '--yes' - name: Bootstrap diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index fb499434..41ab0074 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -21,7 +21,7 @@ jobs: curl -sSf https://rye.astral.sh/get | bash echo "$HOME/.rye/shims" >> $GITHUB_PATH env: - RYE_VERSION: '0.35.0' + RYE_VERSION: '0.44.0' RYE_INSTALL_OPTION: '--yes' - name: Publish to PyPI diff --git a/.release-please-manifest.json b/.release-please-manifest.json index aaf968a1..b56c3d0b 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0-alpha.3" + ".": "0.1.0-alpha.4" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index a38bcf77..03c953a6 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,4 @@ -configured_endpoints: 17 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-a11987df1895448b6ccbbc6d931e7db9a0dc3e6de7c6efb237ac9548342e616b.yml +configured_endpoints: 28 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-8f50cb3e342f2fd67f1f2cfda195b3d78c0740344f55f37cf1c99c66a0f7c2c5.yml +openapi_spec_hash: b9907745f73f337395ffd5cef1e8a2d5 +config_hash: a60b100624e80dc8d9144e7bc306f5ce diff --git a/CHANGELOG.md b/CHANGELOG.md index d57805cf..ffdc48d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,70 @@ # Changelog +## 0.1.0-alpha.4 (2025-05-13) + +Full Changelog: [v0.1.0-alpha.3...v0.1.0-alpha.4](https://github.com/togethercomputer/together-py/compare/v0.1.0-alpha.3...v0.1.0-alpha.4) + +### Features + +* **api:** add tci resources ([023b3a0](https://github.com/togethercomputer/together-py/commit/023b3a00991729a0a06845ee7f64f760cf6f4325)) +* **api:** adds unspecified endpoints ([678f58a](https://github.com/togethercomputer/together-py/commit/678f58af8b2be9e65b667cb0b104a9be4b6667f4)) +* **api:** api update ([6d9609e](https://github.com/togethercomputer/together-py/commit/6d9609e279d228ba1aad926914d089904b858c01)) +* **api:** api update ([bb40eb9](https://github.com/togethercomputer/together-py/commit/bb40eb96cbf911f0f9772c98e261988ab1432383)) +* **api:** api update ([271887f](https://github.com/togethercomputer/together-py/commit/271887fe30d8f4b8f0405d16366e1f82868a1d0d)) +* **api:** api update ([2a7de06](https://github.com/togethercomputer/together-py/commit/2a7de06a3a1b5425a1dd553c32390df21b252e21)) +* **api:** api update ([#117](https://github.com/togethercomputer/together-py/issues/117)) ([dd8e841](https://github.com/togethercomputer/together-py/commit/dd8e841d1eaf40a9f143f63f7f4ced0f701b0fbd)) +* **api:** api update ([#120](https://github.com/togethercomputer/together-py/issues/120)) ([adf0e5b](https://github.com/togethercomputer/together-py/commit/adf0e5ba1cd266278cf4503b04cfcd847a97b0e4)) +* **api:** api update ([#121](https://github.com/togethercomputer/together-py/issues/121)) ([0ab0bc9](https://github.com/togethercomputer/together-py/commit/0ab0bc97ca4db4d2d64f3c2f9eeada9ffa37fc97)) +* **api:** api update ([#130](https://github.com/togethercomputer/together-py/issues/130)) ([4f1a7ea](https://github.com/togethercomputer/together-py/commit/4f1a7ea708c55466f4fa3f1698b505ffbfe2aea6)) +* **api:** api update ([#132](https://github.com/togethercomputer/together-py/issues/132)) ([7c8a194](https://github.com/togethercomputer/together-py/commit/7c8a194c4e1f484f8455adce6f56c840411f4946)) +* **api:** api update ([#135](https://github.com/togethercomputer/together-py/issues/135)) ([22a93e9](https://github.com/togethercomputer/together-py/commit/22a93e9c5c7a33c502f5a4c380c576c2a752d6a5)) + + +### Bug Fixes + +* **ci:** ensure pip is always available ([#127](https://github.com/togethercomputer/together-py/issues/127)) ([4da2bc0](https://github.com/togethercomputer/together-py/commit/4da2bc0bb7cc4516cf0d93032544fbb71025c118)) +* **ci:** remove publishing patch ([#128](https://github.com/togethercomputer/together-py/issues/128)) ([6bd4d6f](https://github.com/togethercomputer/together-py/commit/6bd4d6f8d8f8842f56cdbb56df0a4d5e5227dde4)) +* **client:** correct type to enum ([#129](https://github.com/togethercomputer/together-py/issues/129)) ([8a5fa0e](https://github.com/togethercomputer/together-py/commit/8a5fa0e2858e851756f022943ada948374bb017c)) +* **package:** support direct resource imports ([f59e7c3](https://github.com/togethercomputer/together-py/commit/f59e7c3b3bcc7c076bd8c71b2ab42f8a117e5519)) +* **perf:** optimize some hot paths ([f79734d](https://github.com/togethercomputer/together-py/commit/f79734d809a4a7c18eb8903190e6b4d90d299e45)) +* **perf:** skip traversing types for NotGiven values ([1103dd0](https://github.com/togethercomputer/together-py/commit/1103dd03e7f021deadd0b000b3bff9c5494442b6)) +* **pydantic v1:** more robust ModelField.annotation check ([d380238](https://github.com/togethercomputer/together-py/commit/d3802383e80ad8d3606a1e753c72a20864531332)) +* skip invalid fine-tune test ([#133](https://github.com/togethercomputer/together-py/issues/133)) ([2f41046](https://github.com/togethercomputer/together-py/commit/2f4104625264947305cee0bd26fc38ff290f16ea)) +* **tests:** correctly skip create fine tune tests ([#138](https://github.com/togethercomputer/together-py/issues/138)) ([47c9cae](https://github.com/togethercomputer/together-py/commit/47c9cae7da9caee8de3ba7480b784fc5d168e1b0)) +* **types:** handle more discriminated union shapes ([#126](https://github.com/togethercomputer/together-py/issues/126)) ([2483c76](https://github.com/togethercomputer/together-py/commit/2483c76ee0cf06ee7a1819446cfa4fa349958da4)) + + +### Chores + +* broadly detect json family of content-type headers ([6e2421e](https://github.com/togethercomputer/together-py/commit/6e2421e126e74b4bcc7bc2aaef07a078bdd1e0ea)) +* **ci:** add timeout thresholds for CI jobs ([2425c53](https://github.com/togethercomputer/together-py/commit/2425c53723d34959380d44131d607ded5a665004)) +* **ci:** only use depot for staging repos ([2dfe569](https://github.com/togethercomputer/together-py/commit/2dfe569cf72f74a97fbe1e282c9d079c371d32aa)) +* **ci:** run on more branches and use depot runners ([3c61f56](https://github.com/togethercomputer/together-py/commit/3c61f565633c395dba16fda924c241910145c13c)) +* **client:** minor internal fixes ([f6f5174](https://github.com/togethercomputer/together-py/commit/f6f5174c6ec0b9a3a4decfc25737efbbb52bffe5)) +* fix typos ([#131](https://github.com/togethercomputer/together-py/issues/131)) ([dedf3ad](https://github.com/togethercomputer/together-py/commit/dedf3adb709255ba9303e29354b013db8a8520b9)) +* **internal:** avoid errors for isinstance checks on proxies ([8b81509](https://github.com/togethercomputer/together-py/commit/8b81509faac153ee4a33b3460c17759e2465dfcd)) +* **internal:** base client updates ([890efc3](https://github.com/togethercomputer/together-py/commit/890efc36f00553025237601bad51f3f0a906376b)) +* **internal:** bump pyright version ([01e104a](https://github.com/togethercomputer/together-py/commit/01e104a2bba92c77ef610cf48720d8a2785ff39b)) +* **internal:** bump rye to 0.44.0 ([#124](https://github.com/togethercomputer/together-py/issues/124)) ([e8c3dc3](https://github.com/togethercomputer/together-py/commit/e8c3dc3be0e56d7c4e7a48d8f824a88878e0c981)) +* **internal:** codegen related update ([#125](https://github.com/togethercomputer/together-py/issues/125)) ([5e83e04](https://github.com/togethercomputer/together-py/commit/5e83e043b3f62c38fa13c72d54278e845c2df46a)) +* **internal:** expand CI branch coverage ([#139](https://github.com/togethercomputer/together-py/issues/139)) ([2db8ca2](https://github.com/togethercomputer/together-py/commit/2db8ca2b6d063b136e9cb50c3991a11f6f47e4fb)) +* **internal:** fix list file params ([8a8dcd3](https://github.com/togethercomputer/together-py/commit/8a8dcd384e480c52358460ba662a48311a415cfb)) +* **internal:** import reformatting ([49f361b](https://github.com/togethercomputer/together-py/commit/49f361bf9d548ca45a01e31972b5db797752e481)) +* **internal:** minor formatting changes ([33e3a75](https://github.com/togethercomputer/together-py/commit/33e3a751bd9f3382e5e462bbcf92a212e14d26ff)) +* **internal:** reduce CI branch coverage ([6f6ac97](https://github.com/togethercomputer/together-py/commit/6f6ac973e36bdeb28883ff6281228c67f76c55a1)) +* **internal:** refactor retries to not use recursion ([ffb0eb4](https://github.com/togethercomputer/together-py/commit/ffb0eb46712544a86f01eaa842ac13f085e37fee)) +* **internal:** remove extra empty newlines ([#122](https://github.com/togethercomputer/together-py/issues/122)) ([b0cbbaa](https://github.com/togethercomputer/together-py/commit/b0cbbaa10e003e84cf2c8c23ef05baa6bc9d4e82)) +* **internal:** remove trailing character ([#134](https://github.com/togethercomputer/together-py/issues/134)) ([f09c6cb](https://github.com/togethercomputer/together-py/commit/f09c6cb1620997e72b99bc918d77ae9a2be9e8b3)) +* **internal:** slight transform perf improvement ([#136](https://github.com/togethercomputer/together-py/issues/136)) ([d31383c](https://github.com/togethercomputer/together-py/commit/d31383c0f8fb1749381fad871aa60bd0eaad3e03)) +* **internal:** update models test ([b64d4cc](https://github.com/togethercomputer/together-py/commit/b64d4cc9a1424fa7f46088e51306b877afba3fae)) +* **internal:** update pyright settings ([05720d5](https://github.com/togethercomputer/together-py/commit/05720d5b0b7387fbe3b04975dfa6b764898a7a02)) +* **tests:** improve enum examples ([#137](https://github.com/togethercomputer/together-py/issues/137)) ([4c3e75d](https://github.com/togethercomputer/together-py/commit/4c3e75d5aa75421d4aca257c0df89d24e2db264e)) + + +### Documentation + +* revise readme docs about nested params ([#118](https://github.com/togethercomputer/together-py/issues/118)) ([0eefffd](https://github.com/togethercomputer/together-py/commit/0eefffd623bc692f2e03fd299b9b05c3bb88bf53)) + ## 0.1.0-alpha.3 (2025-03-05) Full Changelog: [v0.1.0-alpha.2...v0.1.0-alpha.3](https://github.com/togethercomputer/together-py/compare/v0.1.0-alpha.2...v0.1.0-alpha.3) diff --git a/README.md b/README.md index 1c6e44f5..fa7c96af 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,31 @@ Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typ Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`. +## Nested params + +Nested parameters are dictionaries, typed using `TypedDict`, for example: + +```python +from together import Together + +client = Together() + +chat_completion = client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "system", + } + ], + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + response_format={ + "schema": {"foo": "bar"}, + "type": "json", + }, +) +print(chat_completion.response_format) +``` + ## Handling errors When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `together.APIConnectionError` is raised. diff --git a/api.md b/api.md index 079efc3b..d1cb335c 100644 --- a/api.md +++ b/api.md @@ -86,6 +86,30 @@ Methods: - client.fine_tune.download(\*\*params) -> FineTuneDownloadResponse - client.fine_tune.list_events(id) -> FineTuneEvent +# CodeInterpreter + +Types: + +```python +from together.types import ExecuteResponse +``` + +Methods: + +- client.code_interpreter.execute(\*\*params) -> ExecuteResponse + +## Sessions + +Types: + +```python +from together.types.code_interpreter import SessionListResponse +``` + +Methods: + +- client.code_interpreter.sessions.list() -> SessionListResponse + # Images Types: @@ -115,9 +139,56 @@ Methods: Types: ```python -from together.types import ModelListResponse +from together.types import ModelListResponse, ModelUploadResponse ``` Methods: - client.models.list() -> ModelListResponse +- client.models.upload(\*\*params) -> ModelUploadResponse + +# Jobs + +Types: + +```python +from together.types import JobRetrieveResponse, JobListResponse +``` + +Methods: + +- client.jobs.retrieve(job_id) -> JobRetrieveResponse +- client.jobs.list() -> JobListResponse + +# Endpoints + +Types: + +```python +from together.types import ( + EndpointCreateResponse, + EndpointRetrieveResponse, + EndpointUpdateResponse, + EndpointListResponse, +) +``` + +Methods: + +- client.endpoints.create(\*\*params) -> EndpointCreateResponse +- client.endpoints.retrieve(endpoint_id) -> EndpointRetrieveResponse +- client.endpoints.update(endpoint_id, \*\*params) -> EndpointUpdateResponse +- client.endpoints.list(\*\*params) -> EndpointListResponse +- client.endpoints.delete(endpoint_id) -> None + +# Hardware + +Types: + +```python +from together.types import HardwareListResponse +``` + +Methods: + +- client.hardware.list(\*\*params) -> HardwareListResponse diff --git a/bin/publish-pypi b/bin/publish-pypi index 05bfccbb..826054e9 100644 --- a/bin/publish-pypi +++ b/bin/publish-pypi @@ -3,7 +3,4 @@ set -eux mkdir -p dist rye build --clean -# Patching importlib-metadata version until upstream library version is updated -# https://github.com/pypa/twine/issues/977#issuecomment-2189800841 -"$HOME/.rye/self/bin/python3" -m pip install 'importlib-metadata==7.2.1' rye publish --yes --token=$PYPI_TOKEN diff --git a/pyproject.toml b/pyproject.toml index 6abb7a59..943b01f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "together" -version = "0.1.0-alpha.3" +version = "0.1.0-alpha.4" description = "The official Python library for the together API" dynamic = ["readme"] license = "Apache-2.0" @@ -50,7 +50,7 @@ together = "together.lib.cli.cli:main" managed = true # version pins are in requirements-dev.lock dev-dependencies = [ - "pyright>=1.1.359", + "pyright==1.1.399", "mypy", "respx", "pytest", @@ -94,7 +94,7 @@ typecheck = { chain = [ "typecheck:mypy" = "mypy ." [build-system] -requires = ["hatchling", "hatch-fancy-pypi-readme"] +requires = ["hatchling==1.26.3", "hatch-fancy-pypi-readme"] build-backend = "hatchling.build" [tool.hatch.build] @@ -155,11 +155,11 @@ exclude = [ ] reportImplicitOverride = true +reportOverlappingOverload = false reportImportCycles = false reportPrivateUsage = false - [tool.ruff] line-length = 120 output-format = "grouped" diff --git a/requirements-dev.lock b/requirements-dev.lock index 76cd6127..addb0f1e 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -7,6 +7,7 @@ # all-features: true # with-sources: false # generate-hashes: false +# universal: false -e file:. annotated-types==0.6.0 @@ -80,7 +81,7 @@ pydantic-core==2.27.1 # via pydantic pygments==2.18.0 # via rich -pyright==1.1.392.post0 +pyright==1.1.399 pytest==8.3.3 # via pytest-asyncio pytest-asyncio==0.24.0 diff --git a/requirements.lock b/requirements.lock index 04389e74..1d8f9198 100644 --- a/requirements.lock +++ b/requirements.lock @@ -7,6 +7,7 @@ # all-features: true # with-sources: false # generate-hashes: false +# universal: false -e file:. annotated-types==0.6.0 diff --git a/scripts/test b/scripts/test index 4fa5698b..2b878456 100755 --- a/scripts/test +++ b/scripts/test @@ -52,6 +52,8 @@ else echo fi +export DEFER_PYDANTIC_BUILD=false + echo "==> Running tests" rye run pytest "$@" diff --git a/src/together/__init__.py b/src/together/__init__.py index bbaaebad..8adebcd9 100644 --- a/src/together/__init__.py +++ b/src/together/__init__.py @@ -1,5 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +import typing as _t + from . import types from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes from ._utils import file_from_path @@ -78,6 +80,9 @@ "DefaultAsyncHttpxClient", ] +if not _t.TYPE_CHECKING: + from ._utils._resources_proxy import resources as resources + _setup_logging() # Update the __module__ attribute for exported symbols so that diff --git a/src/together/_base_client.py b/src/together/_base_client.py index 2e2c8939..ee2f5115 100644 --- a/src/together/_base_client.py +++ b/src/together/_base_client.py @@ -98,7 +98,11 @@ _AsyncStreamT = TypeVar("_AsyncStreamT", bound=AsyncStream[Any]) if TYPE_CHECKING: - from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT + from httpx._config import ( + DEFAULT_TIMEOUT_CONFIG, # pyright: ignore[reportPrivateImportUsage] + ) + + HTTPX_DEFAULT_TIMEOUT = DEFAULT_TIMEOUT_CONFIG else: try: from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT @@ -115,6 +119,7 @@ class PageInfo: url: URL | NotGiven params: Query | NotGiven + json: Body | NotGiven @overload def __init__( @@ -130,19 +135,30 @@ def __init__( params: Query, ) -> None: ... + @overload + def __init__( + self, + *, + json: Body, + ) -> None: ... + def __init__( self, *, url: URL | NotGiven = NOT_GIVEN, + json: Body | NotGiven = NOT_GIVEN, params: Query | NotGiven = NOT_GIVEN, ) -> None: self.url = url + self.json = json self.params = params @override def __repr__(self) -> str: if self.url: return f"{self.__class__.__name__}(url={self.url})" + if self.json: + return f"{self.__class__.__name__}(json={self.json})" return f"{self.__class__.__name__}(params={self.params})" @@ -191,6 +207,19 @@ def _info_to_options(self, info: PageInfo) -> FinalRequestOptions: options.url = str(url) return options + if not isinstance(info.json, NotGiven): + if not is_mapping(info.json): + raise TypeError("Pagination is only supported with mappings") + + if not options.json_data: + options.json_data = {**info.json} + else: + if not is_mapping(options.json_data): + raise TypeError("Pagination is only supported with mappings") + + options.json_data = {**options.json_data, **info.json} + return options + raise ValueError("Unexpected PageInfo state") @@ -408,8 +437,8 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0 headers = httpx.Headers(headers_dict) idempotency_header = self._idempotency_header - if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers: - headers[idempotency_header] = options.idempotency_key or self._idempotency_key() + if idempotency_header and options.idempotency_key and idempotency_header not in headers: + headers[idempotency_header] = options.idempotency_key # Don't set these headers if they were already set or removed by the caller. We check # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case. @@ -873,7 +902,6 @@ def request( self, cast_to: Type[ResponseT], options: FinalRequestOptions, - remaining_retries: Optional[int] = None, *, stream: Literal[True], stream_cls: Type[_StreamT], @@ -884,7 +912,6 @@ def request( self, cast_to: Type[ResponseT], options: FinalRequestOptions, - remaining_retries: Optional[int] = None, *, stream: Literal[False] = False, ) -> ResponseT: ... @@ -894,7 +921,6 @@ def request( self, cast_to: Type[ResponseT], options: FinalRequestOptions, - remaining_retries: Optional[int] = None, *, stream: bool = False, stream_cls: Type[_StreamT] | None = None, @@ -904,121 +930,109 @@ def request( self, cast_to: Type[ResponseT], options: FinalRequestOptions, - remaining_retries: Optional[int] = None, *, stream: bool = False, stream_cls: type[_StreamT] | None = None, ) -> ResponseT | _StreamT: - if remaining_retries is not None: - retries_taken = options.get_max_retries(self.max_retries) - remaining_retries - else: - retries_taken = 0 - - return self._request( - cast_to=cast_to, - options=options, - stream=stream, - stream_cls=stream_cls, - retries_taken=retries_taken, - ) + cast_to = self._maybe_override_cast_to(cast_to, options) - def _request( - self, - *, - cast_to: Type[ResponseT], - options: FinalRequestOptions, - retries_taken: int, - stream: bool, - stream_cls: type[_StreamT] | None, - ) -> ResponseT | _StreamT: # create a copy of the options we were given so that if the # options are mutated later & we then retry, the retries are # given the original options input_options = model_copy(options) + if input_options.idempotency_key is None and input_options.method.lower() != "get": + # ensure the idempotency key is reused between requests + input_options.idempotency_key = self._idempotency_key() - cast_to = self._maybe_override_cast_to(cast_to, options) - options = self._prepare_options(options) - - remaining_retries = options.get_max_retries(self.max_retries) - retries_taken - request = self._build_request(options, retries_taken=retries_taken) - self._prepare_request(request) - - kwargs: HttpxSendArgs = {} - if self.custom_auth is not None: - kwargs["auth"] = self.custom_auth + response: httpx.Response | None = None + max_retries = input_options.get_max_retries(self.max_retries) - log.debug("Sending HTTP Request: %s %s", request.method, request.url) + retries_taken = 0 + for retries_taken in range(max_retries + 1): + options = model_copy(input_options) + options = self._prepare_options(options) - try: - response = self._client.send( - request, - stream=stream or self._should_stream_response_body(request=request), - **kwargs, - ) - except httpx.TimeoutException as err: - log.debug("Encountered httpx.TimeoutException", exc_info=True) + remaining_retries = max_retries - retries_taken + request = self._build_request(options, retries_taken=retries_taken) + self._prepare_request(request) - if remaining_retries > 0: - return self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - stream=stream, - stream_cls=stream_cls, - response_headers=None, - ) + kwargs: HttpxSendArgs = {} + if self.custom_auth is not None: + kwargs["auth"] = self.custom_auth - log.debug("Raising timeout error") - raise APITimeoutError(request=request) from err - except Exception as err: - log.debug("Encountered Exception", exc_info=True) + log.debug("Sending HTTP Request: %s %s", request.method, request.url) - if remaining_retries > 0: - return self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - stream=stream, - stream_cls=stream_cls, - response_headers=None, + response = None + try: + response = self._client.send( + request, + stream=stream or self._should_stream_response_body(request=request), + **kwargs, ) + except httpx.TimeoutException as err: + log.debug("Encountered httpx.TimeoutException", exc_info=True) + + if remaining_retries > 0: + self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=None, + ) + continue + + log.debug("Raising timeout error") + raise APITimeoutError(request=request) from err + except Exception as err: + log.debug("Encountered Exception", exc_info=True) + + if remaining_retries > 0: + self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=None, + ) + continue + + log.debug("Raising connection error") + raise APIConnectionError(request=request) from err + + log.debug( + 'HTTP Response: %s %s "%i %s" %s', + request.method, + request.url, + response.status_code, + response.reason_phrase, + response.headers, + ) - log.debug("Raising connection error") - raise APIConnectionError(request=request) from err - - log.debug( - 'HTTP Response: %s %s "%i %s" %s', - request.method, - request.url, - response.status_code, - response.reason_phrase, - response.headers, - ) + try: + response.raise_for_status() + except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code + log.debug("Encountered httpx.HTTPStatusError", exc_info=True) + + if remaining_retries > 0 and self._should_retry(err.response): + err.response.close() + self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=response, + ) + continue - try: - response.raise_for_status() - except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code - log.debug("Encountered httpx.HTTPStatusError", exc_info=True) - - if remaining_retries > 0 and self._should_retry(err.response): - err.response.close() - return self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - response_headers=err.response.headers, - stream=stream, - stream_cls=stream_cls, - ) + # If the response is streamed then we need to explicitly read the response + # to completion before attempting to access the response text. + if not err.response.is_closed: + err.response.read() - # If the response is streamed then we need to explicitly read the response - # to completion before attempting to access the response text. - if not err.response.is_closed: - err.response.read() + log.debug("Re-raising status error") + raise self._make_status_error_from_response(err.response) from None - log.debug("Re-raising status error") - raise self._make_status_error_from_response(err.response) from None + break + assert response is not None, "could not resolve response (should never happen)" return self._process_response( cast_to=cast_to, options=options, @@ -1028,37 +1042,20 @@ def _request( retries_taken=retries_taken, ) - def _retry_request( - self, - options: FinalRequestOptions, - cast_to: Type[ResponseT], - *, - retries_taken: int, - response_headers: httpx.Headers | None, - stream: bool, - stream_cls: type[_StreamT] | None, - ) -> ResponseT | _StreamT: - remaining_retries = options.get_max_retries(self.max_retries) - retries_taken + def _sleep_for_retry( + self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None + ) -> None: + remaining_retries = max_retries - retries_taken if remaining_retries == 1: log.debug("1 retry left") else: log.debug("%i retries left", remaining_retries) - timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers) + timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None) log.info("Retrying request to %s in %f seconds", options.url, timeout) - # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a - # different thread if necessary. time.sleep(timeout) - return self._request( - options=options, - cast_to=cast_to, - retries_taken=retries_taken + 1, - stream=stream, - stream_cls=stream_cls, - ) - def _process_response( self, *, @@ -1402,7 +1399,6 @@ async def request( options: FinalRequestOptions, *, stream: Literal[False] = False, - remaining_retries: Optional[int] = None, ) -> ResponseT: ... @overload @@ -1413,7 +1409,6 @@ async def request( *, stream: Literal[True], stream_cls: type[_AsyncStreamT], - remaining_retries: Optional[int] = None, ) -> _AsyncStreamT: ... @overload @@ -1424,7 +1419,6 @@ async def request( *, stream: bool, stream_cls: type[_AsyncStreamT] | None = None, - remaining_retries: Optional[int] = None, ) -> ResponseT | _AsyncStreamT: ... async def request( @@ -1434,116 +1428,111 @@ async def request( *, stream: bool = False, stream_cls: type[_AsyncStreamT] | None = None, - remaining_retries: Optional[int] = None, - ) -> ResponseT | _AsyncStreamT: - if remaining_retries is not None: - retries_taken = options.get_max_retries(self.max_retries) - remaining_retries - else: - retries_taken = 0 - - return await self._request( - cast_to=cast_to, - options=options, - stream=stream, - stream_cls=stream_cls, - retries_taken=retries_taken, - ) - - async def _request( - self, - cast_to: Type[ResponseT], - options: FinalRequestOptions, - *, - stream: bool, - stream_cls: type[_AsyncStreamT] | None, - retries_taken: int, ) -> ResponseT | _AsyncStreamT: if self._platform is None: # `get_platform` can make blocking IO calls so we # execute it earlier while we are in an async context self._platform = await asyncify(get_platform)() + cast_to = self._maybe_override_cast_to(cast_to, options) + # create a copy of the options we were given so that if the # options are mutated later & we then retry, the retries are # given the original options input_options = model_copy(options) + if input_options.idempotency_key is None and input_options.method.lower() != "get": + # ensure the idempotency key is reused between requests + input_options.idempotency_key = self._idempotency_key() - cast_to = self._maybe_override_cast_to(cast_to, options) - options = await self._prepare_options(options) + response: httpx.Response | None = None + max_retries = input_options.get_max_retries(self.max_retries) - remaining_retries = options.get_max_retries(self.max_retries) - retries_taken - request = self._build_request(options, retries_taken=retries_taken) - await self._prepare_request(request) + retries_taken = 0 + for retries_taken in range(max_retries + 1): + options = model_copy(input_options) + options = await self._prepare_options(options) - kwargs: HttpxSendArgs = {} - if self.custom_auth is not None: - kwargs["auth"] = self.custom_auth + remaining_retries = max_retries - retries_taken + request = self._build_request(options, retries_taken=retries_taken) + await self._prepare_request(request) - try: - response = await self._client.send( - request, - stream=stream or self._should_stream_response_body(request=request), - **kwargs, - ) - except httpx.TimeoutException as err: - log.debug("Encountered httpx.TimeoutException", exc_info=True) - - if remaining_retries > 0: - return await self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - stream=stream, - stream_cls=stream_cls, - response_headers=None, - ) + kwargs: HttpxSendArgs = {} + if self.custom_auth is not None: + kwargs["auth"] = self.custom_auth - log.debug("Raising timeout error") - raise APITimeoutError(request=request) from err - except Exception as err: - log.debug("Encountered Exception", exc_info=True) + log.debug("Sending HTTP Request: %s %s", request.method, request.url) - if remaining_retries > 0: - return await self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - stream=stream, - stream_cls=stream_cls, - response_headers=None, + response = None + try: + response = await self._client.send( + request, + stream=stream or self._should_stream_response_body(request=request), + **kwargs, ) + except httpx.TimeoutException as err: + log.debug("Encountered httpx.TimeoutException", exc_info=True) + + if remaining_retries > 0: + await self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=None, + ) + continue + + log.debug("Raising timeout error") + raise APITimeoutError(request=request) from err + except Exception as err: + log.debug("Encountered Exception", exc_info=True) + + if remaining_retries > 0: + await self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=None, + ) + continue + + log.debug("Raising connection error") + raise APIConnectionError(request=request) from err + + log.debug( + 'HTTP Response: %s %s "%i %s" %s', + request.method, + request.url, + response.status_code, + response.reason_phrase, + response.headers, + ) - log.debug("Raising connection error") - raise APIConnectionError(request=request) from err + try: + response.raise_for_status() + except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code + log.debug("Encountered httpx.HTTPStatusError", exc_info=True) + + if remaining_retries > 0 and self._should_retry(err.response): + await err.response.aclose() + await self._sleep_for_retry( + retries_taken=retries_taken, + max_retries=max_retries, + options=input_options, + response=response, + ) + continue - log.debug( - 'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase - ) + # If the response is streamed then we need to explicitly read the response + # to completion before attempting to access the response text. + if not err.response.is_closed: + await err.response.aread() - try: - response.raise_for_status() - except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code - log.debug("Encountered httpx.HTTPStatusError", exc_info=True) - - if remaining_retries > 0 and self._should_retry(err.response): - await err.response.aclose() - return await self._retry_request( - input_options, - cast_to, - retries_taken=retries_taken, - response_headers=err.response.headers, - stream=stream, - stream_cls=stream_cls, - ) + log.debug("Re-raising status error") + raise self._make_status_error_from_response(err.response) from None - # If the response is streamed then we need to explicitly read the response - # to completion before attempting to access the response text. - if not err.response.is_closed: - await err.response.aread() - - log.debug("Re-raising status error") - raise self._make_status_error_from_response(err.response) from None + break + assert response is not None, "could not resolve response (should never happen)" return await self._process_response( cast_to=cast_to, options=options, @@ -1553,35 +1542,20 @@ async def _request( retries_taken=retries_taken, ) - async def _retry_request( - self, - options: FinalRequestOptions, - cast_to: Type[ResponseT], - *, - retries_taken: int, - response_headers: httpx.Headers | None, - stream: bool, - stream_cls: type[_AsyncStreamT] | None, - ) -> ResponseT | _AsyncStreamT: - remaining_retries = options.get_max_retries(self.max_retries) - retries_taken + async def _sleep_for_retry( + self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None + ) -> None: + remaining_retries = max_retries - retries_taken if remaining_retries == 1: log.debug("1 retry left") else: log.debug("%i retries left", remaining_retries) - timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers) + timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None) log.info("Retrying request to %s in %f seconds", options.url, timeout) await anyio.sleep(timeout) - return await self._request( - options=options, - cast_to=cast_to, - retries_taken=retries_taken + 1, - stream=stream, - stream_cls=stream_cls, - ) - async def _process_response( self, *, diff --git a/src/together/_client.py b/src/together/_client.py index a37edcc1..f2e64aed 100644 --- a/src/together/_client.py +++ b/src/together/_client.py @@ -36,7 +36,7 @@ async_to_raw_response_wrapper, async_to_streamed_response_wrapper, ) -from .resources import audio, files, images, models, fine_tune, embeddings, completions +from .resources import jobs, audio, files, images, models, hardware, endpoints, fine_tune, embeddings, completions from ._streaming import Stream as Stream, AsyncStream as AsyncStream from ._exceptions import TogetherError, APIStatusError from ._base_client import ( @@ -47,6 +47,7 @@ ) from .resources.chat import chat from .types.rerank_response import RerankResponse +from .resources.code_interpreter import code_interpreter __all__ = [ "Timeout", @@ -66,9 +67,13 @@ class Together(SyncAPIClient): embeddings: embeddings.EmbeddingsResource files: files.FilesResource fine_tune: fine_tune.FineTuneResource + code_interpreter: code_interpreter.CodeInterpreterResource images: images.ImagesResource audio: audio.AudioResource models: models.ModelsResource + jobs: jobs.JobsResource + endpoints: endpoints.EndpointsResource + hardware: hardware.HardwareResource with_raw_response: TogetherWithRawResponse with_streaming_response: TogetherWithStreamedResponse @@ -133,9 +138,13 @@ def __init__( self.embeddings = embeddings.EmbeddingsResource(self) self.files = files.FilesResource(self) self.fine_tune = fine_tune.FineTuneResource(self) + self.code_interpreter = code_interpreter.CodeInterpreterResource(self) self.images = images.ImagesResource(self) self.audio = audio.AudioResource(self) self.models = models.ModelsResource(self) + self.jobs = jobs.JobsResource(self) + self.endpoints = endpoints.EndpointsResource(self) + self.hardware = hardware.HardwareResource(self) self.with_raw_response = TogetherWithRawResponse(self) self.with_streaming_response = TogetherWithStreamedResponse(self) @@ -312,9 +321,13 @@ class AsyncTogether(AsyncAPIClient): embeddings: embeddings.AsyncEmbeddingsResource files: files.AsyncFilesResource fine_tune: fine_tune.AsyncFineTuneResource + code_interpreter: code_interpreter.AsyncCodeInterpreterResource images: images.AsyncImagesResource audio: audio.AsyncAudioResource models: models.AsyncModelsResource + jobs: jobs.AsyncJobsResource + endpoints: endpoints.AsyncEndpointsResource + hardware: hardware.AsyncHardwareResource with_raw_response: AsyncTogetherWithRawResponse with_streaming_response: AsyncTogetherWithStreamedResponse @@ -379,9 +392,13 @@ def __init__( self.embeddings = embeddings.AsyncEmbeddingsResource(self) self.files = files.AsyncFilesResource(self) self.fine_tune = fine_tune.AsyncFineTuneResource(self) + self.code_interpreter = code_interpreter.AsyncCodeInterpreterResource(self) self.images = images.AsyncImagesResource(self) self.audio = audio.AsyncAudioResource(self) self.models = models.AsyncModelsResource(self) + self.jobs = jobs.AsyncJobsResource(self) + self.endpoints = endpoints.AsyncEndpointsResource(self) + self.hardware = hardware.AsyncHardwareResource(self) self.with_raw_response = AsyncTogetherWithRawResponse(self) self.with_streaming_response = AsyncTogetherWithStreamedResponse(self) @@ -559,9 +576,13 @@ def __init__(self, client: Together) -> None: self.embeddings = embeddings.EmbeddingsResourceWithRawResponse(client.embeddings) self.files = files.FilesResourceWithRawResponse(client.files) self.fine_tune = fine_tune.FineTuneResourceWithRawResponse(client.fine_tune) + self.code_interpreter = code_interpreter.CodeInterpreterResourceWithRawResponse(client.code_interpreter) self.images = images.ImagesResourceWithRawResponse(client.images) self.audio = audio.AudioResourceWithRawResponse(client.audio) self.models = models.ModelsResourceWithRawResponse(client.models) + self.jobs = jobs.JobsResourceWithRawResponse(client.jobs) + self.endpoints = endpoints.EndpointsResourceWithRawResponse(client.endpoints) + self.hardware = hardware.HardwareResourceWithRawResponse(client.hardware) self.rerank = to_raw_response_wrapper( client.rerank, @@ -575,9 +596,13 @@ def __init__(self, client: AsyncTogether) -> None: self.embeddings = embeddings.AsyncEmbeddingsResourceWithRawResponse(client.embeddings) self.files = files.AsyncFilesResourceWithRawResponse(client.files) self.fine_tune = fine_tune.AsyncFineTuneResourceWithRawResponse(client.fine_tune) + self.code_interpreter = code_interpreter.AsyncCodeInterpreterResourceWithRawResponse(client.code_interpreter) self.images = images.AsyncImagesResourceWithRawResponse(client.images) self.audio = audio.AsyncAudioResourceWithRawResponse(client.audio) self.models = models.AsyncModelsResourceWithRawResponse(client.models) + self.jobs = jobs.AsyncJobsResourceWithRawResponse(client.jobs) + self.endpoints = endpoints.AsyncEndpointsResourceWithRawResponse(client.endpoints) + self.hardware = hardware.AsyncHardwareResourceWithRawResponse(client.hardware) self.rerank = async_to_raw_response_wrapper( client.rerank, @@ -591,9 +616,13 @@ def __init__(self, client: Together) -> None: self.embeddings = embeddings.EmbeddingsResourceWithStreamingResponse(client.embeddings) self.files = files.FilesResourceWithStreamingResponse(client.files) self.fine_tune = fine_tune.FineTuneResourceWithStreamingResponse(client.fine_tune) + self.code_interpreter = code_interpreter.CodeInterpreterResourceWithStreamingResponse(client.code_interpreter) self.images = images.ImagesResourceWithStreamingResponse(client.images) self.audio = audio.AudioResourceWithStreamingResponse(client.audio) self.models = models.ModelsResourceWithStreamingResponse(client.models) + self.jobs = jobs.JobsResourceWithStreamingResponse(client.jobs) + self.endpoints = endpoints.EndpointsResourceWithStreamingResponse(client.endpoints) + self.hardware = hardware.HardwareResourceWithStreamingResponse(client.hardware) self.rerank = to_streamed_response_wrapper( client.rerank, @@ -607,9 +636,15 @@ def __init__(self, client: AsyncTogether) -> None: self.embeddings = embeddings.AsyncEmbeddingsResourceWithStreamingResponse(client.embeddings) self.files = files.AsyncFilesResourceWithStreamingResponse(client.files) self.fine_tune = fine_tune.AsyncFineTuneResourceWithStreamingResponse(client.fine_tune) + self.code_interpreter = code_interpreter.AsyncCodeInterpreterResourceWithStreamingResponse( + client.code_interpreter + ) self.images = images.AsyncImagesResourceWithStreamingResponse(client.images) self.audio = audio.AsyncAudioResourceWithStreamingResponse(client.audio) self.models = models.AsyncModelsResourceWithStreamingResponse(client.models) + self.jobs = jobs.AsyncJobsResourceWithStreamingResponse(client.jobs) + self.endpoints = endpoints.AsyncEndpointsResourceWithStreamingResponse(client.endpoints) + self.hardware = hardware.AsyncHardwareResourceWithStreamingResponse(client.hardware) self.rerank = async_to_streamed_response_wrapper( client.rerank, diff --git a/src/together/_models.py b/src/together/_models.py index c4401ff8..798956f1 100644 --- a/src/together/_models.py +++ b/src/together/_models.py @@ -19,7 +19,6 @@ ) import pydantic -import pydantic.generics from pydantic.fields import FieldInfo from ._types import ( @@ -65,7 +64,7 @@ from ._constants import RAW_RESPONSE_HEADER if TYPE_CHECKING: - from pydantic_core.core_schema import ModelField, LiteralSchema, ModelFieldsSchema + from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema __all__ = ["BaseModel", "GenericModel"] @@ -627,8 +626,8 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, # Note: if one variant defines an alias then they all should discriminator_alias = field_info.alias - if field_info.annotation and is_literal_type(field_info.annotation): - for entry in get_args(field_info.annotation): + if (annotation := getattr(field_info, "annotation", None)) and is_literal_type(annotation): + for entry in get_args(annotation): if isinstance(entry, str): mapping[entry] = variant @@ -646,15 +645,18 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None: schema = model.__pydantic_core_schema__ + if schema["type"] == "definitions": + schema = schema["schema"] + if schema["type"] != "model": return None + schema = cast("ModelSchema", schema) fields_schema = schema["schema"] if fields_schema["type"] != "model-fields": return None fields_schema = cast("ModelFieldsSchema", fields_schema) - field = fields_schema["fields"].get(field_name) if not field: return None @@ -678,7 +680,7 @@ def set_pydantic_config(typ: Any, config: pydantic.ConfigDict) -> None: setattr(typ, "__pydantic_config__", config) # noqa: B010 -# our use of subclasssing here causes weirdness for type checkers, +# our use of subclassing here causes weirdness for type checkers, # so we just pretend that we don't subclass if TYPE_CHECKING: GenericModel = BaseModel diff --git a/src/together/_response.py b/src/together/_response.py index fb38392e..7188c3e4 100644 --- a/src/together/_response.py +++ b/src/together/_response.py @@ -233,7 +233,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T: # split is required to handle cases where additional information is included # in the response, e.g. application/json; charset=utf-8 content_type, *_ = response.headers.get("content-type", "*").split(";") - if content_type != "application/json": + if not content_type.endswith("json"): if is_basemodel(cast_to): try: data = response.json() diff --git a/src/together/_utils/_proxy.py b/src/together/_utils/_proxy.py index ffd883e9..0f239a33 100644 --- a/src/together/_utils/_proxy.py +++ b/src/together/_utils/_proxy.py @@ -46,7 +46,10 @@ def __dir__(self) -> Iterable[str]: @property # type: ignore @override def __class__(self) -> type: # pyright: ignore - proxied = self.__get_proxied__() + try: + proxied = self.__get_proxied__() + except Exception: + return type(self) if issubclass(type(proxied), LazyProxy): return type(proxied) return proxied.__class__ diff --git a/src/together/_utils/_resources_proxy.py b/src/together/_utils/_resources_proxy.py new file mode 100644 index 00000000..68dd27e7 --- /dev/null +++ b/src/together/_utils/_resources_proxy.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import Any +from typing_extensions import override + +from ._proxy import LazyProxy + + +class ResourcesProxy(LazyProxy[Any]): + """A proxy for the `together.resources` module. + + This is used so that we can lazily import `together.resources` only when + needed *and* so that users can just import `together` and reference `together.resources` + """ + + @override + def __load__(self) -> Any: + import importlib + + mod = importlib.import_module("together.resources") + return mod + + +resources = ResourcesProxy().__as_proxied__() diff --git a/src/together/_utils/_transform.py b/src/together/_utils/_transform.py index 18afd9d8..b0cc20a7 100644 --- a/src/together/_utils/_transform.py +++ b/src/together/_utils/_transform.py @@ -5,13 +5,15 @@ import pathlib from typing import Any, Mapping, TypeVar, cast from datetime import date, datetime -from typing_extensions import Literal, get_args, override, get_type_hints +from typing_extensions import Literal, get_args, override, get_type_hints as _get_type_hints import anyio import pydantic from ._utils import ( is_list, + is_given, + lru_cache, is_mapping, is_iterable, ) @@ -108,6 +110,7 @@ class Params(TypedDict, total=False): return cast(_T, transformed) +@lru_cache(maxsize=8096) def _get_annotated_type(type_: type) -> type | None: """If the given type is an `Annotated` type then it is returned, if not `None` is returned. @@ -126,7 +129,7 @@ def _get_annotated_type(type_: type) -> type | None: def _maybe_transform_key(key: str, type_: type) -> str: """Transform the given `data` based on the annotations provided in `type_`. - Note: this function only looks at `Annotated` types that contain `PropertInfo` metadata. + Note: this function only looks at `Annotated` types that contain `PropertyInfo` metadata. """ annotated_type = _get_annotated_type(type_) if annotated_type is None: @@ -142,6 +145,10 @@ def _maybe_transform_key(key: str, type_: type) -> str: return key +def _no_transform_needed(annotation: type) -> bool: + return annotation == float or annotation == int + + def _transform_recursive( data: object, *, @@ -184,6 +191,15 @@ def _transform_recursive( return cast(object, data) inner_type = extract_type_arg(stripped_type, 0) + if _no_transform_needed(inner_type): + # for some types there is no need to transform anything, so we can get a small + # perf boost from skipping that work. + # + # but we still need to convert to a list to ensure the data is json-serializable + if is_list(data): + return data + return list(data) + return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data] if is_union_type(stripped_type): @@ -245,6 +261,11 @@ def _transform_typeddict( result: dict[str, object] = {} annotations = get_type_hints(expected_type, include_extras=True) for key, value in data.items(): + if not is_given(value): + # we don't need to include `NotGiven` values here as they'll + # be stripped out before the request is sent anyway + continue + type_ = annotations.get(key) if type_ is None: # we do not have a type annotation for this field, leave it as is @@ -332,6 +353,15 @@ async def _async_transform_recursive( return cast(object, data) inner_type = extract_type_arg(stripped_type, 0) + if _no_transform_needed(inner_type): + # for some types there is no need to transform anything, so we can get a small + # perf boost from skipping that work. + # + # but we still need to convert to a list to ensure the data is json-serializable + if is_list(data): + return data + return list(data) + return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data] if is_union_type(stripped_type): @@ -393,6 +423,11 @@ async def _async_transform_typeddict( result: dict[str, object] = {} annotations = get_type_hints(expected_type, include_extras=True) for key, value in data.items(): + if not is_given(value): + # we don't need to include `NotGiven` values here as they'll + # be stripped out before the request is sent anyway + continue + type_ = annotations.get(key) if type_ is None: # we do not have a type annotation for this field, leave it as is @@ -400,3 +435,13 @@ async def _async_transform_typeddict( else: result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_) return result + + +@lru_cache(maxsize=8096) +def get_type_hints( + obj: Any, + globalns: dict[str, Any] | None = None, + localns: Mapping[str, Any] | None = None, + include_extras: bool = False, +) -> dict[str, Any]: + return _get_type_hints(obj, globalns=globalns, localns=localns, include_extras=include_extras) diff --git a/src/together/_utils/_typing.py b/src/together/_utils/_typing.py index 278749b1..1bac9542 100644 --- a/src/together/_utils/_typing.py +++ b/src/together/_utils/_typing.py @@ -13,6 +13,7 @@ get_origin, ) +from ._utils import lru_cache from .._types import InheritsGeneric from .._compat import is_union as _is_union @@ -66,6 +67,7 @@ def is_type_alias_type(tp: Any, /) -> TypeIs[typing_extensions.TypeAliasType]: # Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]] +@lru_cache(maxsize=8096) def strip_annotated_type(typ: type) -> type: if is_required_type(typ) or is_annotated_type(typ): return strip_annotated_type(cast(type, get_args(typ)[0])) @@ -108,7 +110,7 @@ class MyResponse(Foo[_T]): ``` """ cls = cast(object, get_origin(typ) or typ) - if cls in generic_bases: + if cls in generic_bases: # pyright: ignore[reportUnnecessaryContains] # we're given the class directly return extract_type_arg(typ, index) diff --git a/src/together/_utils/_utils.py b/src/together/_utils/_utils.py index e5811bba..ea3cf3f2 100644 --- a/src/together/_utils/_utils.py +++ b/src/together/_utils/_utils.py @@ -72,8 +72,16 @@ def _extract_items( from .._files import assert_is_file_content # We have exhausted the path, return the entry we found. - assert_is_file_content(obj, key=flattened_key) assert flattened_key is not None + + if is_list(obj): + files: list[tuple[str, FileTypes]] = [] + for entry in obj: + assert_is_file_content(entry, key=flattened_key + "[]" if flattened_key else "") + files.append((flattened_key + "[]", cast(FileTypes, entry))) + return files + + assert_is_file_content(obj, key=flattened_key) return [(flattened_key, cast(FileTypes, obj))] index += 1 diff --git a/src/together/_version.py b/src/together/_version.py index 149ef267..2b3d6409 100644 --- a/src/together/_version.py +++ b/src/together/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "together" -__version__ = "0.1.0-alpha.3" # x-release-please-version +__version__ = "0.1.0-alpha.4" # x-release-please-version diff --git a/src/together/resources/__init__.py b/src/together/resources/__init__.py index 14f5ae11..bd3e4c51 100644 --- a/src/together/resources/__init__.py +++ b/src/together/resources/__init__.py @@ -8,6 +8,14 @@ ChatResourceWithStreamingResponse, AsyncChatResourceWithStreamingResponse, ) +from .jobs import ( + JobsResource, + AsyncJobsResource, + JobsResourceWithRawResponse, + AsyncJobsResourceWithRawResponse, + JobsResourceWithStreamingResponse, + AsyncJobsResourceWithStreamingResponse, +) from .audio import ( AudioResource, AsyncAudioResource, @@ -40,6 +48,22 @@ ModelsResourceWithStreamingResponse, AsyncModelsResourceWithStreamingResponse, ) +from .hardware import ( + HardwareResource, + AsyncHardwareResource, + HardwareResourceWithRawResponse, + AsyncHardwareResourceWithRawResponse, + HardwareResourceWithStreamingResponse, + AsyncHardwareResourceWithStreamingResponse, +) +from .endpoints import ( + EndpointsResource, + AsyncEndpointsResource, + EndpointsResourceWithRawResponse, + AsyncEndpointsResourceWithRawResponse, + EndpointsResourceWithStreamingResponse, + AsyncEndpointsResourceWithStreamingResponse, +) from .fine_tune import ( FineTuneResource, AsyncFineTuneResource, @@ -64,6 +88,14 @@ CompletionsResourceWithStreamingResponse, AsyncCompletionsResourceWithStreamingResponse, ) +from .code_interpreter import ( + CodeInterpreterResource, + AsyncCodeInterpreterResource, + CodeInterpreterResourceWithRawResponse, + AsyncCodeInterpreterResourceWithRawResponse, + CodeInterpreterResourceWithStreamingResponse, + AsyncCodeInterpreterResourceWithStreamingResponse, +) __all__ = [ "ChatResource", @@ -96,6 +128,12 @@ "AsyncFineTuneResourceWithRawResponse", "FineTuneResourceWithStreamingResponse", "AsyncFineTuneResourceWithStreamingResponse", + "CodeInterpreterResource", + "AsyncCodeInterpreterResource", + "CodeInterpreterResourceWithRawResponse", + "AsyncCodeInterpreterResourceWithRawResponse", + "CodeInterpreterResourceWithStreamingResponse", + "AsyncCodeInterpreterResourceWithStreamingResponse", "ImagesResource", "AsyncImagesResource", "ImagesResourceWithRawResponse", @@ -114,4 +152,22 @@ "AsyncModelsResourceWithRawResponse", "ModelsResourceWithStreamingResponse", "AsyncModelsResourceWithStreamingResponse", + "JobsResource", + "AsyncJobsResource", + "JobsResourceWithRawResponse", + "AsyncJobsResourceWithRawResponse", + "JobsResourceWithStreamingResponse", + "AsyncJobsResourceWithStreamingResponse", + "EndpointsResource", + "AsyncEndpointsResource", + "EndpointsResourceWithRawResponse", + "AsyncEndpointsResourceWithRawResponse", + "EndpointsResourceWithStreamingResponse", + "AsyncEndpointsResourceWithStreamingResponse", + "HardwareResource", + "AsyncHardwareResource", + "HardwareResourceWithRawResponse", + "AsyncHardwareResourceWithRawResponse", + "HardwareResourceWithStreamingResponse", + "AsyncHardwareResourceWithStreamingResponse", ] diff --git a/src/together/resources/audio.py b/src/together/resources/audio.py index c21fc754..7a8b2455 100644 --- a/src/together/resources/audio.py +++ b/src/together/resources/audio.py @@ -9,10 +9,7 @@ from ..types import audio_create_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py index be27ee86..9b9b413a 100644 --- a/src/together/resources/chat/completions.py +++ b/src/together/resources/chat/completions.py @@ -8,11 +8,7 @@ import httpx from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from ..._utils import ( - required_args, - maybe_transform, - async_maybe_transform, -) +from ..._utils import required_args, maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource from ..._response import ( @@ -515,7 +511,9 @@ def create( "top_k": top_k, "top_p": top_p, }, - completion_create_params.CompletionCreateParams, + completion_create_params.CompletionCreateParamsStreaming + if stream + else completion_create_params.CompletionCreateParamsNonStreaming, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout @@ -1010,7 +1008,9 @@ async def create( "top_k": top_k, "top_p": top_p, }, - completion_create_params.CompletionCreateParams, + completion_create_params.CompletionCreateParamsStreaming + if stream + else completion_create_params.CompletionCreateParamsNonStreaming, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout diff --git a/src/together/resources/code_interpreter/__init__.py b/src/together/resources/code_interpreter/__init__.py new file mode 100644 index 00000000..d5e30afe --- /dev/null +++ b/src/together/resources/code_interpreter/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .sessions import ( + SessionsResource, + AsyncSessionsResource, + SessionsResourceWithRawResponse, + AsyncSessionsResourceWithRawResponse, + SessionsResourceWithStreamingResponse, + AsyncSessionsResourceWithStreamingResponse, +) +from .code_interpreter import ( + CodeInterpreterResource, + AsyncCodeInterpreterResource, + CodeInterpreterResourceWithRawResponse, + AsyncCodeInterpreterResourceWithRawResponse, + CodeInterpreterResourceWithStreamingResponse, + AsyncCodeInterpreterResourceWithStreamingResponse, +) + +__all__ = [ + "SessionsResource", + "AsyncSessionsResource", + "SessionsResourceWithRawResponse", + "AsyncSessionsResourceWithRawResponse", + "SessionsResourceWithStreamingResponse", + "AsyncSessionsResourceWithStreamingResponse", + "CodeInterpreterResource", + "AsyncCodeInterpreterResource", + "CodeInterpreterResourceWithRawResponse", + "AsyncCodeInterpreterResourceWithRawResponse", + "CodeInterpreterResourceWithStreamingResponse", + "AsyncCodeInterpreterResourceWithStreamingResponse", +] diff --git a/src/together/resources/code_interpreter/code_interpreter.py b/src/together/resources/code_interpreter/code_interpreter.py new file mode 100644 index 00000000..8a9b777d --- /dev/null +++ b/src/together/resources/code_interpreter/code_interpreter.py @@ -0,0 +1,258 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Any, Iterable, cast +from typing_extensions import Literal + +import httpx + +from ...types import code_interpreter_execute_params +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ..._utils import maybe_transform, async_maybe_transform +from .sessions import ( + SessionsResource, + AsyncSessionsResource, + SessionsResourceWithRawResponse, + AsyncSessionsResourceWithRawResponse, + SessionsResourceWithStreamingResponse, + AsyncSessionsResourceWithStreamingResponse, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from ..._base_client import make_request_options +from ...types.execute_response import ExecuteResponse + +__all__ = ["CodeInterpreterResource", "AsyncCodeInterpreterResource"] + + +class CodeInterpreterResource(SyncAPIResource): + @cached_property + def sessions(self) -> SessionsResource: + return SessionsResource(self._client) + + @cached_property + def with_raw_response(self) -> CodeInterpreterResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers + """ + return CodeInterpreterResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> CodeInterpreterResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response + """ + return CodeInterpreterResourceWithStreamingResponse(self) + + def execute( + self, + *, + code: str, + language: Literal["python"], + files: Iterable[code_interpreter_execute_params.File] | NotGiven = NOT_GIVEN, + session_id: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ExecuteResponse: + """Executes the given code snippet and returns the output. + + Without a session_id, a + new session will be created to run the code. If you do pass in a valid + session_id, the code will be run in that session. This is useful for running + multiple code snippets in the same environment, because dependencies and similar + things are persisted between calls to the same session. + + Args: + code: Code snippet to execute. + + language: Programming language for the code to execute. Currently only supports Python, + but more will be added. + + files: Files to upload to the session. If present, files will be uploaded before + executing the given code. + + session_id: Identifier of the current session. Used to make follow-up calls. Requests will + return an error if the session does not belong to the caller or has expired. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return cast( + ExecuteResponse, + self._post( + "/tci/execute", + body=maybe_transform( + { + "code": code, + "language": language, + "files": files, + "session_id": session_id, + }, + code_interpreter_execute_params.CodeInterpreterExecuteParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=cast(Any, ExecuteResponse), # Union types cannot be passed in as arguments in the type system + ), + ) + + +class AsyncCodeInterpreterResource(AsyncAPIResource): + @cached_property + def sessions(self) -> AsyncSessionsResource: + return AsyncSessionsResource(self._client) + + @cached_property + def with_raw_response(self) -> AsyncCodeInterpreterResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers + """ + return AsyncCodeInterpreterResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncCodeInterpreterResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response + """ + return AsyncCodeInterpreterResourceWithStreamingResponse(self) + + async def execute( + self, + *, + code: str, + language: Literal["python"], + files: Iterable[code_interpreter_execute_params.File] | NotGiven = NOT_GIVEN, + session_id: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ExecuteResponse: + """Executes the given code snippet and returns the output. + + Without a session_id, a + new session will be created to run the code. If you do pass in a valid + session_id, the code will be run in that session. This is useful for running + multiple code snippets in the same environment, because dependencies and similar + things are persisted between calls to the same session. + + Args: + code: Code snippet to execute. + + language: Programming language for the code to execute. Currently only supports Python, + but more will be added. + + files: Files to upload to the session. If present, files will be uploaded before + executing the given code. + + session_id: Identifier of the current session. Used to make follow-up calls. Requests will + return an error if the session does not belong to the caller or has expired. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return cast( + ExecuteResponse, + await self._post( + "/tci/execute", + body=await async_maybe_transform( + { + "code": code, + "language": language, + "files": files, + "session_id": session_id, + }, + code_interpreter_execute_params.CodeInterpreterExecuteParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=cast(Any, ExecuteResponse), # Union types cannot be passed in as arguments in the type system + ), + ) + + +class CodeInterpreterResourceWithRawResponse: + def __init__(self, code_interpreter: CodeInterpreterResource) -> None: + self._code_interpreter = code_interpreter + + self.execute = to_raw_response_wrapper( + code_interpreter.execute, + ) + + @cached_property + def sessions(self) -> SessionsResourceWithRawResponse: + return SessionsResourceWithRawResponse(self._code_interpreter.sessions) + + +class AsyncCodeInterpreterResourceWithRawResponse: + def __init__(self, code_interpreter: AsyncCodeInterpreterResource) -> None: + self._code_interpreter = code_interpreter + + self.execute = async_to_raw_response_wrapper( + code_interpreter.execute, + ) + + @cached_property + def sessions(self) -> AsyncSessionsResourceWithRawResponse: + return AsyncSessionsResourceWithRawResponse(self._code_interpreter.sessions) + + +class CodeInterpreterResourceWithStreamingResponse: + def __init__(self, code_interpreter: CodeInterpreterResource) -> None: + self._code_interpreter = code_interpreter + + self.execute = to_streamed_response_wrapper( + code_interpreter.execute, + ) + + @cached_property + def sessions(self) -> SessionsResourceWithStreamingResponse: + return SessionsResourceWithStreamingResponse(self._code_interpreter.sessions) + + +class AsyncCodeInterpreterResourceWithStreamingResponse: + def __init__(self, code_interpreter: AsyncCodeInterpreterResource) -> None: + self._code_interpreter = code_interpreter + + self.execute = async_to_streamed_response_wrapper( + code_interpreter.execute, + ) + + @cached_property + def sessions(self) -> AsyncSessionsResourceWithStreamingResponse: + return AsyncSessionsResourceWithStreamingResponse(self._code_interpreter.sessions) diff --git a/src/together/resources/code_interpreter/sessions.py b/src/together/resources/code_interpreter/sessions.py new file mode 100644 index 00000000..c4f3a8b0 --- /dev/null +++ b/src/together/resources/code_interpreter/sessions.py @@ -0,0 +1,135 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import httpx + +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from ..._base_client import make_request_options +from ...types.code_interpreter.session_list_response import SessionListResponse + +__all__ = ["SessionsResource", "AsyncSessionsResource"] + + +class SessionsResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> SessionsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers + """ + return SessionsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> SessionsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response + """ + return SessionsResourceWithStreamingResponse(self) + + def list( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SessionListResponse: + """Lists all your currently active sessions.""" + return self._get( + "/tci/sessions", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=SessionListResponse, + ) + + +class AsyncSessionsResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncSessionsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers + """ + return AsyncSessionsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncSessionsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response + """ + return AsyncSessionsResourceWithStreamingResponse(self) + + async def list( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SessionListResponse: + """Lists all your currently active sessions.""" + return await self._get( + "/tci/sessions", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=SessionListResponse, + ) + + +class SessionsResourceWithRawResponse: + def __init__(self, sessions: SessionsResource) -> None: + self._sessions = sessions + + self.list = to_raw_response_wrapper( + sessions.list, + ) + + +class AsyncSessionsResourceWithRawResponse: + def __init__(self, sessions: AsyncSessionsResource) -> None: + self._sessions = sessions + + self.list = async_to_raw_response_wrapper( + sessions.list, + ) + + +class SessionsResourceWithStreamingResponse: + def __init__(self, sessions: SessionsResource) -> None: + self._sessions = sessions + + self.list = to_streamed_response_wrapper( + sessions.list, + ) + + +class AsyncSessionsResourceWithStreamingResponse: + def __init__(self, sessions: AsyncSessionsResource) -> None: + self._sessions = sessions + + self.list = async_to_streamed_response_wrapper( + sessions.list, + ) diff --git a/src/together/resources/completions.py b/src/together/resources/completions.py index 9f1a0ba0..d201dcd2 100644 --- a/src/together/resources/completions.py +++ b/src/together/resources/completions.py @@ -9,11 +9,7 @@ from ..types import completion_create_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from .._utils import ( - required_args, - maybe_transform, - async_maybe_transform, -) +from .._utils import required_args, maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( @@ -442,7 +438,9 @@ def create( "top_k": top_k, "top_p": top_p, }, - completion_create_params.CompletionCreateParams, + completion_create_params.CompletionCreateParamsStreaming + if stream + else completion_create_params.CompletionCreateParamsNonStreaming, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout @@ -866,7 +864,9 @@ async def create( "top_k": top_k, "top_p": top_p, }, - completion_create_params.CompletionCreateParams, + completion_create_params.CompletionCreateParamsStreaming + if stream + else completion_create_params.CompletionCreateParamsNonStreaming, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout diff --git a/src/together/resources/embeddings.py b/src/together/resources/embeddings.py index 936cfbc1..36688cb3 100644 --- a/src/together/resources/embeddings.py +++ b/src/together/resources/embeddings.py @@ -9,10 +9,7 @@ from ..types import embedding_create_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/together/resources/endpoints.py b/src/together/resources/endpoints.py new file mode 100644 index 00000000..933c1fc5 --- /dev/null +++ b/src/together/resources/endpoints.py @@ -0,0 +1,627 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Optional +from typing_extensions import Literal + +import httpx + +from ..types import endpoint_list_params, endpoint_create_params, endpoint_update_params +from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven +from .._utils import maybe_transform, async_maybe_transform +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from .._base_client import make_request_options +from ..types.endpoint_list_response import EndpointListResponse +from ..types.endpoint_create_response import EndpointCreateResponse +from ..types.endpoint_update_response import EndpointUpdateResponse +from ..types.endpoint_retrieve_response import EndpointRetrieveResponse + +__all__ = ["EndpointsResource", "AsyncEndpointsResource"] + + +class EndpointsResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> EndpointsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers + """ + return EndpointsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> EndpointsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response + """ + return EndpointsResourceWithStreamingResponse(self) + + def create( + self, + *, + autoscaling: endpoint_create_params.Autoscaling, + hardware: str, + model: str, + disable_prompt_cache: bool | NotGiven = NOT_GIVEN, + disable_speculative_decoding: bool | NotGiven = NOT_GIVEN, + display_name: str | NotGiven = NOT_GIVEN, + inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN, + state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> EndpointCreateResponse: + """Creates a new dedicated endpoint for serving models. + + The endpoint will + automatically start after creation. You can deploy any supported model on + hardware configurations that meet the model's requirements. + + Args: + autoscaling: Configuration for automatic scaling of the endpoint + + hardware: The hardware configuration to use for this endpoint + + model: The model to deploy on this endpoint + + disable_prompt_cache: Whether to disable the prompt cache for this endpoint + + disable_speculative_decoding: Whether to disable speculative decoding for this endpoint + + display_name: A human-readable name for the endpoint + + inactive_timeout: The number of minutes of inactivity after which the endpoint will be + automatically stopped. Set to null, omit or set to 0 to disable automatic + timeout. + + state: The desired state of the endpoint + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/endpoints", + body=maybe_transform( + { + "autoscaling": autoscaling, + "hardware": hardware, + "model": model, + "disable_prompt_cache": disable_prompt_cache, + "disable_speculative_decoding": disable_speculative_decoding, + "display_name": display_name, + "inactive_timeout": inactive_timeout, + "state": state, + }, + endpoint_create_params.EndpointCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=EndpointCreateResponse, + ) + + def retrieve( + self, + endpoint_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> EndpointRetrieveResponse: + """ + Retrieves details about a specific endpoint, including its current state, + configuration, and scaling settings. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not endpoint_id: + raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}") + return self._get( + f"/endpoints/{endpoint_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=EndpointRetrieveResponse, + ) + + def update( + self, + endpoint_id: str, + *, + autoscaling: endpoint_update_params.Autoscaling | NotGiven = NOT_GIVEN, + display_name: str | NotGiven = NOT_GIVEN, + inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN, + state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> EndpointUpdateResponse: + """Updates an existing endpoint's configuration. + + You can modify the display name, + autoscaling settings, or change the endpoint's state (start/stop). + + Args: + autoscaling: New autoscaling configuration for the endpoint + + display_name: A human-readable name for the endpoint + + inactive_timeout: The number of minutes of inactivity after which the endpoint will be + automatically stopped. Set to 0 to disable automatic timeout. + + state: The desired state of the endpoint + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not endpoint_id: + raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}") + return self._patch( + f"/endpoints/{endpoint_id}", + body=maybe_transform( + { + "autoscaling": autoscaling, + "display_name": display_name, + "inactive_timeout": inactive_timeout, + "state": state, + }, + endpoint_update_params.EndpointUpdateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=EndpointUpdateResponse, + ) + + def list( + self, + *, + type: Literal["dedicated", "serverless"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> EndpointListResponse: + """Returns a list of all endpoints associated with your account. + + You can filter the + results by type (dedicated or serverless). + + Args: + type: Filter endpoints by type + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get( + "/endpoints", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform({"type": type}, endpoint_list_params.EndpointListParams), + ), + cast_to=EndpointListResponse, + ) + + def delete( + self, + endpoint_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """Permanently deletes an endpoint. + + This action cannot be undone. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not endpoint_id: + raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}") + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + return self._delete( + f"/endpoints/{endpoint_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class AsyncEndpointsResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncEndpointsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers + """ + return AsyncEndpointsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncEndpointsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response + """ + return AsyncEndpointsResourceWithStreamingResponse(self) + + async def create( + self, + *, + autoscaling: endpoint_create_params.Autoscaling, + hardware: str, + model: str, + disable_prompt_cache: bool | NotGiven = NOT_GIVEN, + disable_speculative_decoding: bool | NotGiven = NOT_GIVEN, + display_name: str | NotGiven = NOT_GIVEN, + inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN, + state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> EndpointCreateResponse: + """Creates a new dedicated endpoint for serving models. + + The endpoint will + automatically start after creation. You can deploy any supported model on + hardware configurations that meet the model's requirements. + + Args: + autoscaling: Configuration for automatic scaling of the endpoint + + hardware: The hardware configuration to use for this endpoint + + model: The model to deploy on this endpoint + + disable_prompt_cache: Whether to disable the prompt cache for this endpoint + + disable_speculative_decoding: Whether to disable speculative decoding for this endpoint + + display_name: A human-readable name for the endpoint + + inactive_timeout: The number of minutes of inactivity after which the endpoint will be + automatically stopped. Set to null, omit or set to 0 to disable automatic + timeout. + + state: The desired state of the endpoint + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/endpoints", + body=await async_maybe_transform( + { + "autoscaling": autoscaling, + "hardware": hardware, + "model": model, + "disable_prompt_cache": disable_prompt_cache, + "disable_speculative_decoding": disable_speculative_decoding, + "display_name": display_name, + "inactive_timeout": inactive_timeout, + "state": state, + }, + endpoint_create_params.EndpointCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=EndpointCreateResponse, + ) + + async def retrieve( + self, + endpoint_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> EndpointRetrieveResponse: + """ + Retrieves details about a specific endpoint, including its current state, + configuration, and scaling settings. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not endpoint_id: + raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}") + return await self._get( + f"/endpoints/{endpoint_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=EndpointRetrieveResponse, + ) + + async def update( + self, + endpoint_id: str, + *, + autoscaling: endpoint_update_params.Autoscaling | NotGiven = NOT_GIVEN, + display_name: str | NotGiven = NOT_GIVEN, + inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN, + state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> EndpointUpdateResponse: + """Updates an existing endpoint's configuration. + + You can modify the display name, + autoscaling settings, or change the endpoint's state (start/stop). + + Args: + autoscaling: New autoscaling configuration for the endpoint + + display_name: A human-readable name for the endpoint + + inactive_timeout: The number of minutes of inactivity after which the endpoint will be + automatically stopped. Set to 0 to disable automatic timeout. + + state: The desired state of the endpoint + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not endpoint_id: + raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}") + return await self._patch( + f"/endpoints/{endpoint_id}", + body=await async_maybe_transform( + { + "autoscaling": autoscaling, + "display_name": display_name, + "inactive_timeout": inactive_timeout, + "state": state, + }, + endpoint_update_params.EndpointUpdateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=EndpointUpdateResponse, + ) + + async def list( + self, + *, + type: Literal["dedicated", "serverless"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> EndpointListResponse: + """Returns a list of all endpoints associated with your account. + + You can filter the + results by type (dedicated or serverless). + + Args: + type: Filter endpoints by type + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._get( + "/endpoints", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=await async_maybe_transform({"type": type}, endpoint_list_params.EndpointListParams), + ), + cast_to=EndpointListResponse, + ) + + async def delete( + self, + endpoint_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """Permanently deletes an endpoint. + + This action cannot be undone. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not endpoint_id: + raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}") + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + return await self._delete( + f"/endpoints/{endpoint_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class EndpointsResourceWithRawResponse: + def __init__(self, endpoints: EndpointsResource) -> None: + self._endpoints = endpoints + + self.create = to_raw_response_wrapper( + endpoints.create, + ) + self.retrieve = to_raw_response_wrapper( + endpoints.retrieve, + ) + self.update = to_raw_response_wrapper( + endpoints.update, + ) + self.list = to_raw_response_wrapper( + endpoints.list, + ) + self.delete = to_raw_response_wrapper( + endpoints.delete, + ) + + +class AsyncEndpointsResourceWithRawResponse: + def __init__(self, endpoints: AsyncEndpointsResource) -> None: + self._endpoints = endpoints + + self.create = async_to_raw_response_wrapper( + endpoints.create, + ) + self.retrieve = async_to_raw_response_wrapper( + endpoints.retrieve, + ) + self.update = async_to_raw_response_wrapper( + endpoints.update, + ) + self.list = async_to_raw_response_wrapper( + endpoints.list, + ) + self.delete = async_to_raw_response_wrapper( + endpoints.delete, + ) + + +class EndpointsResourceWithStreamingResponse: + def __init__(self, endpoints: EndpointsResource) -> None: + self._endpoints = endpoints + + self.create = to_streamed_response_wrapper( + endpoints.create, + ) + self.retrieve = to_streamed_response_wrapper( + endpoints.retrieve, + ) + self.update = to_streamed_response_wrapper( + endpoints.update, + ) + self.list = to_streamed_response_wrapper( + endpoints.list, + ) + self.delete = to_streamed_response_wrapper( + endpoints.delete, + ) + + +class AsyncEndpointsResourceWithStreamingResponse: + def __init__(self, endpoints: AsyncEndpointsResource) -> None: + self._endpoints = endpoints + + self.create = async_to_streamed_response_wrapper( + endpoints.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + endpoints.retrieve, + ) + self.update = async_to_streamed_response_wrapper( + endpoints.update, + ) + self.list = async_to_streamed_response_wrapper( + endpoints.list, + ) + self.delete = async_to_streamed_response_wrapper( + endpoints.delete, + ) diff --git a/src/together/resources/fine_tune.py b/src/together/resources/fine_tune.py index b3b62585..84619f78 100644 --- a/src/together/resources/fine_tune.py +++ b/src/together/resources/fine_tune.py @@ -9,10 +9,7 @@ from ..types import fine_tune_create_params, fine_tune_download_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( @@ -55,7 +52,8 @@ def create( *, model: str, training_file: str, - batch_size: int | NotGiven = NOT_GIVEN, + batch_size: Union[int, Literal["max"]] | NotGiven = NOT_GIVEN, + from_checkpoint: str | NotGiven = NOT_GIVEN, learning_rate: float | NotGiven = NOT_GIVEN, lr_scheduler: fine_tune_create_params.LrScheduler | NotGiven = NOT_GIVEN, max_grad_norm: float | NotGiven = NOT_GIVEN, @@ -64,6 +62,7 @@ def create( n_evals: int | NotGiven = NOT_GIVEN, suffix: str | NotGiven = NOT_GIVEN, train_on_inputs: Union[bool, Literal["auto"]] | NotGiven = NOT_GIVEN, + training_method: fine_tune_create_params.TrainingMethod | NotGiven = NOT_GIVEN, training_type: fine_tune_create_params.TrainingType | NotGiven = NOT_GIVEN, validation_file: str | NotGiven = NOT_GIVEN, wandb_api_key: str | NotGiven = NOT_GIVEN, @@ -88,11 +87,20 @@ def create( training_file: File-ID of a training file uploaded to the Together API batch_size: Number of training examples processed together (larger batches use more memory - but may train faster) + but may train faster). Defaults to "max". We use training optimizations like + packing, so the effective batch size may be different than the value you set. + + from_checkpoint: The checkpoint identifier to continue training from a previous fine-tuning job. + Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or + `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the + final checkpoint will be used. learning_rate: Controls how quickly the model adapts to new information (too high may cause instability, too low may slow convergence) + lr_scheduler: The learning rate scheduler to use. It specifies how the learning rate is + adjusted during training. + max_grad_norm: Max gradient norm to be used for gradient clipping. Set to 0 to disable. n_checkpoints: Number of intermediate model versions saved during training for evaluation @@ -107,6 +115,9 @@ def create( train_on_inputs: Whether to mask the user messages in conversational data or prompts in instruction data. + training_method: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct + Preference Optimization. + validation_file: File-ID of a validation file uploaded to the Together API wandb_api_key: Integration key for tracking experiments and model metrics on W&B platform @@ -121,7 +132,7 @@ def create( warmup_ratio: The percent of steps at the start of training to linearly increase the learning rate. - weight_decay: Weight decay + weight_decay: Weight decay. Regularization parameter for the optimizer. extra_headers: Send extra headers @@ -138,6 +149,7 @@ def create( "model": model, "training_file": training_file, "batch_size": batch_size, + "from_checkpoint": from_checkpoint, "learning_rate": learning_rate, "lr_scheduler": lr_scheduler, "max_grad_norm": max_grad_norm, @@ -146,6 +158,7 @@ def create( "n_evals": n_evals, "suffix": suffix, "train_on_inputs": train_on_inputs, + "training_method": training_method, "training_type": training_type, "validation_file": validation_file, "wandb_api_key": wandb_api_key, @@ -364,7 +377,8 @@ async def create( *, model: str, training_file: str, - batch_size: int | NotGiven = NOT_GIVEN, + batch_size: Union[int, Literal["max"]] | NotGiven = NOT_GIVEN, + from_checkpoint: str | NotGiven = NOT_GIVEN, learning_rate: float | NotGiven = NOT_GIVEN, lr_scheduler: fine_tune_create_params.LrScheduler | NotGiven = NOT_GIVEN, max_grad_norm: float | NotGiven = NOT_GIVEN, @@ -373,6 +387,7 @@ async def create( n_evals: int | NotGiven = NOT_GIVEN, suffix: str | NotGiven = NOT_GIVEN, train_on_inputs: Union[bool, Literal["auto"]] | NotGiven = NOT_GIVEN, + training_method: fine_tune_create_params.TrainingMethod | NotGiven = NOT_GIVEN, training_type: fine_tune_create_params.TrainingType | NotGiven = NOT_GIVEN, validation_file: str | NotGiven = NOT_GIVEN, wandb_api_key: str | NotGiven = NOT_GIVEN, @@ -397,11 +412,20 @@ async def create( training_file: File-ID of a training file uploaded to the Together API batch_size: Number of training examples processed together (larger batches use more memory - but may train faster) + but may train faster). Defaults to "max". We use training optimizations like + packing, so the effective batch size may be different than the value you set. + + from_checkpoint: The checkpoint identifier to continue training from a previous fine-tuning job. + Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or + `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the + final checkpoint will be used. learning_rate: Controls how quickly the model adapts to new information (too high may cause instability, too low may slow convergence) + lr_scheduler: The learning rate scheduler to use. It specifies how the learning rate is + adjusted during training. + max_grad_norm: Max gradient norm to be used for gradient clipping. Set to 0 to disable. n_checkpoints: Number of intermediate model versions saved during training for evaluation @@ -416,6 +440,9 @@ async def create( train_on_inputs: Whether to mask the user messages in conversational data or prompts in instruction data. + training_method: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct + Preference Optimization. + validation_file: File-ID of a validation file uploaded to the Together API wandb_api_key: Integration key for tracking experiments and model metrics on W&B platform @@ -430,7 +457,7 @@ async def create( warmup_ratio: The percent of steps at the start of training to linearly increase the learning rate. - weight_decay: Weight decay + weight_decay: Weight decay. Regularization parameter for the optimizer. extra_headers: Send extra headers @@ -447,6 +474,7 @@ async def create( "model": model, "training_file": training_file, "batch_size": batch_size, + "from_checkpoint": from_checkpoint, "learning_rate": learning_rate, "lr_scheduler": lr_scheduler, "max_grad_norm": max_grad_norm, @@ -455,6 +483,7 @@ async def create( "n_evals": n_evals, "suffix": suffix, "train_on_inputs": train_on_inputs, + "training_method": training_method, "training_type": training_type, "validation_file": validation_file, "wandb_api_key": wandb_api_key, diff --git a/src/together/resources/hardware.py b/src/together/resources/hardware.py new file mode 100644 index 00000000..302e6303 --- /dev/null +++ b/src/together/resources/hardware.py @@ -0,0 +1,181 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import httpx + +from ..types import hardware_list_params +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._utils import maybe_transform, async_maybe_transform +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from .._base_client import make_request_options +from ..types.hardware_list_response import HardwareListResponse + +__all__ = ["HardwareResource", "AsyncHardwareResource"] + + +class HardwareResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> HardwareResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers + """ + return HardwareResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> HardwareResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response + """ + return HardwareResourceWithStreamingResponse(self) + + def list( + self, + *, + model: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> HardwareListResponse: + """Returns a list of available hardware configurations for deploying models. + + When a + model parameter is provided, it returns only hardware configurations compatible + with that model, including their current availability status. + + Args: + model: Filter hardware configurations by model compatibility. When provided, the + response includes availability status for each compatible configuration. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get( + "/hardware", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform({"model": model}, hardware_list_params.HardwareListParams), + ), + cast_to=HardwareListResponse, + ) + + +class AsyncHardwareResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncHardwareResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers + """ + return AsyncHardwareResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncHardwareResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response + """ + return AsyncHardwareResourceWithStreamingResponse(self) + + async def list( + self, + *, + model: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> HardwareListResponse: + """Returns a list of available hardware configurations for deploying models. + + When a + model parameter is provided, it returns only hardware configurations compatible + with that model, including their current availability status. + + Args: + model: Filter hardware configurations by model compatibility. When provided, the + response includes availability status for each compatible configuration. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._get( + "/hardware", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=await async_maybe_transform({"model": model}, hardware_list_params.HardwareListParams), + ), + cast_to=HardwareListResponse, + ) + + +class HardwareResourceWithRawResponse: + def __init__(self, hardware: HardwareResource) -> None: + self._hardware = hardware + + self.list = to_raw_response_wrapper( + hardware.list, + ) + + +class AsyncHardwareResourceWithRawResponse: + def __init__(self, hardware: AsyncHardwareResource) -> None: + self._hardware = hardware + + self.list = async_to_raw_response_wrapper( + hardware.list, + ) + + +class HardwareResourceWithStreamingResponse: + def __init__(self, hardware: HardwareResource) -> None: + self._hardware = hardware + + self.list = to_streamed_response_wrapper( + hardware.list, + ) + + +class AsyncHardwareResourceWithStreamingResponse: + def __init__(self, hardware: AsyncHardwareResource) -> None: + self._hardware = hardware + + self.list = async_to_streamed_response_wrapper( + hardware.list, + ) diff --git a/src/together/resources/images.py b/src/together/resources/images.py index c92542df..30526e1d 100644 --- a/src/together/resources/images.py +++ b/src/together/resources/images.py @@ -9,10 +9,7 @@ from ..types import image_create_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven -from .._utils import ( - maybe_transform, - async_maybe_transform, -) +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( diff --git a/src/together/resources/jobs.py b/src/together/resources/jobs.py new file mode 100644 index 00000000..2eae45ab --- /dev/null +++ b/src/together/resources/jobs.py @@ -0,0 +1,214 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import httpx + +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from .._base_client import make_request_options +from ..types.job_list_response import JobListResponse +from ..types.job_retrieve_response import JobRetrieveResponse + +__all__ = ["JobsResource", "AsyncJobsResource"] + + +class JobsResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> JobsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers + """ + return JobsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> JobsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response + """ + return JobsResourceWithStreamingResponse(self) + + def retrieve( + self, + job_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> JobRetrieveResponse: + """ + Get the status of a specific job + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not job_id: + raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") + return self._get( + f"/jobs/{job_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=JobRetrieveResponse, + ) + + def list( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> JobListResponse: + """List all jobs and their statuses""" + return self._get( + "/jobs", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=JobListResponse, + ) + + +class AsyncJobsResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncJobsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers + """ + return AsyncJobsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncJobsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response + """ + return AsyncJobsResourceWithStreamingResponse(self) + + async def retrieve( + self, + job_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> JobRetrieveResponse: + """ + Get the status of a specific job + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not job_id: + raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}") + return await self._get( + f"/jobs/{job_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=JobRetrieveResponse, + ) + + async def list( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> JobListResponse: + """List all jobs and their statuses""" + return await self._get( + "/jobs", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=JobListResponse, + ) + + +class JobsResourceWithRawResponse: + def __init__(self, jobs: JobsResource) -> None: + self._jobs = jobs + + self.retrieve = to_raw_response_wrapper( + jobs.retrieve, + ) + self.list = to_raw_response_wrapper( + jobs.list, + ) + + +class AsyncJobsResourceWithRawResponse: + def __init__(self, jobs: AsyncJobsResource) -> None: + self._jobs = jobs + + self.retrieve = async_to_raw_response_wrapper( + jobs.retrieve, + ) + self.list = async_to_raw_response_wrapper( + jobs.list, + ) + + +class JobsResourceWithStreamingResponse: + def __init__(self, jobs: JobsResource) -> None: + self._jobs = jobs + + self.retrieve = to_streamed_response_wrapper( + jobs.retrieve, + ) + self.list = to_streamed_response_wrapper( + jobs.list, + ) + + +class AsyncJobsResourceWithStreamingResponse: + def __init__(self, jobs: AsyncJobsResource) -> None: + self._jobs = jobs + + self.retrieve = async_to_streamed_response_wrapper( + jobs.retrieve, + ) + self.list = async_to_streamed_response_wrapper( + jobs.list, + ) diff --git a/src/together/resources/models.py b/src/together/resources/models.py index 1432e2c0..b4a9cc73 100644 --- a/src/together/resources/models.py +++ b/src/together/resources/models.py @@ -2,9 +2,13 @@ from __future__ import annotations +from typing_extensions import Literal + import httpx +from ..types import model_upload_params from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._utils import maybe_transform, async_maybe_transform from .._compat import cached_property from .._resource import SyncAPIResource, AsyncAPIResource from .._response import ( @@ -15,6 +19,7 @@ ) from .._base_client import make_request_options from ..types.model_list_response import ModelListResponse +from ..types.model_upload_response import ModelUploadResponse __all__ = ["ModelsResource", "AsyncModelsResource"] @@ -58,6 +63,71 @@ def list( cast_to=ModelListResponse, ) + def upload( + self, + *, + model_name: str, + model_source: str, + base_model: str | NotGiven = NOT_GIVEN, + description: str | NotGiven = NOT_GIVEN, + hf_token: str | NotGiven = NOT_GIVEN, + lora_model: str | NotGiven = NOT_GIVEN, + model_type: Literal["model", "adapter"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ModelUploadResponse: + """ + Upload a custom model or adapter from Hugging Face or S3 + + Args: + model_name: The name to give to your uploaded model + + model_source: The source location of the model (Hugging Face repo or S3 path) + + base_model: The base model to use for an adapter if setting it to run against a serverless + pool. Only used for model_type `adapter`. + + description: A description of your model + + hf_token: Hugging Face token (if uploading from Hugging Face) + + lora_model: The lora pool to use for an adapter if setting it to run against, say, a + dedicated pool. Only used for model_type `adapter`. + + model_type: Whether the model is a full model or an adapter + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/models", + body=maybe_transform( + { + "model_name": model_name, + "model_source": model_source, + "base_model": base_model, + "description": description, + "hf_token": hf_token, + "lora_model": lora_model, + "model_type": model_type, + }, + model_upload_params.ModelUploadParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ModelUploadResponse, + ) + class AsyncModelsResource(AsyncAPIResource): @cached_property @@ -98,6 +168,71 @@ async def list( cast_to=ModelListResponse, ) + async def upload( + self, + *, + model_name: str, + model_source: str, + base_model: str | NotGiven = NOT_GIVEN, + description: str | NotGiven = NOT_GIVEN, + hf_token: str | NotGiven = NOT_GIVEN, + lora_model: str | NotGiven = NOT_GIVEN, + model_type: Literal["model", "adapter"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ModelUploadResponse: + """ + Upload a custom model or adapter from Hugging Face or S3 + + Args: + model_name: The name to give to your uploaded model + + model_source: The source location of the model (Hugging Face repo or S3 path) + + base_model: The base model to use for an adapter if setting it to run against a serverless + pool. Only used for model_type `adapter`. + + description: A description of your model + + hf_token: Hugging Face token (if uploading from Hugging Face) + + lora_model: The lora pool to use for an adapter if setting it to run against, say, a + dedicated pool. Only used for model_type `adapter`. + + model_type: Whether the model is a full model or an adapter + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/models", + body=await async_maybe_transform( + { + "model_name": model_name, + "model_source": model_source, + "base_model": base_model, + "description": description, + "hf_token": hf_token, + "lora_model": lora_model, + "model_type": model_type, + }, + model_upload_params.ModelUploadParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ModelUploadResponse, + ) + class ModelsResourceWithRawResponse: def __init__(self, models: ModelsResource) -> None: @@ -106,6 +241,9 @@ def __init__(self, models: ModelsResource) -> None: self.list = to_raw_response_wrapper( models.list, ) + self.upload = to_raw_response_wrapper( + models.upload, + ) class AsyncModelsResourceWithRawResponse: @@ -115,6 +253,9 @@ def __init__(self, models: AsyncModelsResource) -> None: self.list = async_to_raw_response_wrapper( models.list, ) + self.upload = async_to_raw_response_wrapper( + models.upload, + ) class ModelsResourceWithStreamingResponse: @@ -124,6 +265,9 @@ def __init__(self, models: ModelsResource) -> None: self.list = to_streamed_response_wrapper( models.list, ) + self.upload = to_streamed_response_wrapper( + models.upload, + ) class AsyncModelsResourceWithStreamingResponse: @@ -133,3 +277,6 @@ def __init__(self, models: AsyncModelsResource) -> None: self.list = async_to_streamed_response_wrapper( models.list, ) + self.upload = async_to_streamed_response_wrapper( + models.upload, + ) diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py index 5d067f82..d3096869 100644 --- a/src/together/types/__init__.py +++ b/src/together/types/__init__.py @@ -11,17 +11,32 @@ from .tools_param import ToolsParam as ToolsParam from .fine_tune_event import FineTuneEvent as FineTuneEvent from .rerank_response import RerankResponse as RerankResponse +from .execute_response import ExecuteResponse as ExecuteResponse +from .job_list_response import JobListResponse as JobListResponse from .tool_choice_param import ToolChoiceParam as ToolChoiceParam from .file_list_response import FileListResponse as FileListResponse from .audio_create_params import AudioCreateParams as AudioCreateParams from .image_create_params import ImageCreateParams as ImageCreateParams from .model_list_response import ModelListResponse as ModelListResponse +from .model_upload_params import ModelUploadParams as ModelUploadParams from .client_rerank_params import ClientRerankParams as ClientRerankParams +from .endpoint_list_params import EndpointListParams as EndpointListParams from .file_delete_response import FileDeleteResponse as FileDeleteResponse +from .hardware_list_params import HardwareListParams as HardwareListParams +from .job_retrieve_response import JobRetrieveResponse as JobRetrieveResponse +from .model_upload_response import ModelUploadResponse as ModelUploadResponse +from .endpoint_create_params import EndpointCreateParams as EndpointCreateParams +from .endpoint_list_response import EndpointListResponse as EndpointListResponse +from .endpoint_update_params import EndpointUpdateParams as EndpointUpdateParams from .file_retrieve_response import FileRetrieveResponse as FileRetrieveResponse +from .hardware_list_response import HardwareListResponse as HardwareListResponse from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams from .fine_tune_create_params import FineTuneCreateParams as FineTuneCreateParams from .fine_tune_list_response import FineTuneListResponse as FineTuneListResponse from .completion_create_params import CompletionCreateParams as CompletionCreateParams +from .endpoint_create_response import EndpointCreateResponse as EndpointCreateResponse +from .endpoint_update_response import EndpointUpdateResponse as EndpointUpdateResponse from .fine_tune_download_params import FineTuneDownloadParams as FineTuneDownloadParams +from .endpoint_retrieve_response import EndpointRetrieveResponse as EndpointRetrieveResponse from .fine_tune_download_response import FineTuneDownloadResponse as FineTuneDownloadResponse +from .code_interpreter_execute_params import CodeInterpreterExecuteParams as CodeInterpreterExecuteParams diff --git a/src/together/types/chat/chat_completion_structured_message_image_url_param.py b/src/together/types/chat/chat_completion_structured_message_image_url_param.py index 1a1f5bfc..25d737ca 100644 --- a/src/together/types/chat/chat_completion_structured_message_image_url_param.py +++ b/src/together/types/chat/chat_completion_structured_message_image_url_param.py @@ -9,10 +9,10 @@ class ImageURL(TypedDict, total=False): url: Required[str] - """The URL of the image as a plain string.""" + """The URL of the image""" class ChatCompletionStructuredMessageImageURLParam(TypedDict, total=False): - image_url: Required[ImageURL] + image_url: ImageURL - type: Required[Literal["image_url"]] + type: Literal["image_url"] diff --git a/src/together/types/chat/chat_completion_usage.py b/src/together/types/chat/chat_completion_usage.py index 82b9d450..510233f9 100644 --- a/src/together/types/chat/chat_completion_usage.py +++ b/src/together/types/chat/chat_completion_usage.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from ..._models import BaseModel __all__ = ["ChatCompletionUsage"] diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py index d6749ac7..be7d65c0 100644 --- a/src/together/types/chat/completion_create_params.py +++ b/src/together/types/chat/completion_create_params.py @@ -14,6 +14,8 @@ "CompletionCreateParamsBase", "Message", "MessageContentUnionMember1", + "MessageContentUnionMember1Video", + "MessageContentUnionMember1VideoVideoURL", "FunctionCall", "FunctionCallName", "ResponseFormat", @@ -157,8 +159,21 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ +class MessageContentUnionMember1VideoVideoURL(TypedDict, total=False): + url: Required[str] + """The URL of the video""" + + +class MessageContentUnionMember1Video(TypedDict, total=False): + type: Required[Literal["video_url"]] + + video_url: Required[MessageContentUnionMember1VideoVideoURL] + + MessageContentUnionMember1: TypeAlias = Union[ - ChatCompletionStructuredMessageTextParam, ChatCompletionStructuredMessageImageURLParam + ChatCompletionStructuredMessageTextParam, + ChatCompletionStructuredMessageImageURLParam, + MessageContentUnionMember1Video, ] @@ -170,7 +185,10 @@ class Message(TypedDict, total=False): """ role: Required[Literal["system", "user", "assistant", "tool"]] - """The role of the messages author. Choice between: system, user, or assistant.""" + """The role of the messages author. + + Choice between: system, user, assistant, or tool. + """ class FunctionCallName(TypedDict, total=False): @@ -181,7 +199,7 @@ class FunctionCallName(TypedDict, total=False): class ResponseFormat(TypedDict, total=False): - schema: Dict[str, str] + schema: Dict[str, object] """The schema of the response format.""" type: str diff --git a/src/together/types/code_interpreter/__init__.py b/src/together/types/code_interpreter/__init__.py new file mode 100644 index 00000000..82331854 --- /dev/null +++ b/src/together/types/code_interpreter/__init__.py @@ -0,0 +1,5 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from .session_list_response import SessionListResponse as SessionListResponse diff --git a/src/together/types/code_interpreter/session_list_response.py b/src/together/types/code_interpreter/session_list_response.py new file mode 100644 index 00000000..f4379c0d --- /dev/null +++ b/src/together/types/code_interpreter/session_list_response.py @@ -0,0 +1,31 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Dict, List, Union, Optional +from datetime import datetime + +from ..._models import BaseModel + +__all__ = ["SessionListResponse", "Data", "DataSession"] + + +class DataSession(BaseModel): + id: str + """Session Identifier. Used to make follow-up calls.""" + + execute_count: int + + expires_at: datetime + + last_execute_at: datetime + + started_at: datetime + + +class Data(BaseModel): + sessions: List[DataSession] + + +class SessionListResponse(BaseModel): + data: Optional[Data] = None + + errors: Optional[List[Union[str, Dict[str, object]]]] = None diff --git a/src/together/types/code_interpreter_execute_params.py b/src/together/types/code_interpreter_execute_params.py new file mode 100644 index 00000000..91cf6c02 --- /dev/null +++ b/src/together/types/code_interpreter_execute_params.py @@ -0,0 +1,45 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Iterable +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["CodeInterpreterExecuteParams", "File"] + + +class CodeInterpreterExecuteParams(TypedDict, total=False): + code: Required[str] + """Code snippet to execute.""" + + language: Required[Literal["python"]] + """Programming language for the code to execute. + + Currently only supports Python, but more will be added. + """ + + files: Iterable[File] + """Files to upload to the session. + + If present, files will be uploaded before executing the given code. + """ + + session_id: str + """Identifier of the current session. + + Used to make follow-up calls. Requests will return an error if the session does + not belong to the caller or has expired. + """ + + +class File(TypedDict, total=False): + content: Required[str] + + encoding: Required[Literal["string", "base64"]] + """Encoding of the file content. + + Use `string` for text files such as code, and `base64` for binary files, such as + images. + """ + + name: Required[str] diff --git a/src/together/types/endpoint_create_params.py b/src/together/types/endpoint_create_params.py new file mode 100644 index 00000000..0e41f9cd --- /dev/null +++ b/src/together/types/endpoint_create_params.py @@ -0,0 +1,46 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Optional +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["EndpointCreateParams", "Autoscaling"] + + +class EndpointCreateParams(TypedDict, total=False): + autoscaling: Required[Autoscaling] + """Configuration for automatic scaling of the endpoint""" + + hardware: Required[str] + """The hardware configuration to use for this endpoint""" + + model: Required[str] + """The model to deploy on this endpoint""" + + disable_prompt_cache: bool + """Whether to disable the prompt cache for this endpoint""" + + disable_speculative_decoding: bool + """Whether to disable speculative decoding for this endpoint""" + + display_name: str + """A human-readable name for the endpoint""" + + inactive_timeout: Optional[int] + """ + The number of minutes of inactivity after which the endpoint will be + automatically stopped. Set to null, omit or set to 0 to disable automatic + timeout. + """ + + state: Literal["STARTED", "STOPPED"] + """The desired state of the endpoint""" + + +class Autoscaling(TypedDict, total=False): + max_replicas: Required[int] + """The maximum number of replicas to scale up to under load""" + + min_replicas: Required[int] + """The minimum number of replicas to maintain, even when there is no load""" diff --git a/src/together/types/endpoint_create_response.py b/src/together/types/endpoint_create_response.py new file mode 100644 index 00000000..2421d2f3 --- /dev/null +++ b/src/together/types/endpoint_create_response.py @@ -0,0 +1,51 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from datetime import datetime +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["EndpointCreateResponse", "Autoscaling"] + + +class Autoscaling(BaseModel): + max_replicas: int + """The maximum number of replicas to scale up to under load""" + + min_replicas: int + """The minimum number of replicas to maintain, even when there is no load""" + + +class EndpointCreateResponse(BaseModel): + id: str + """Unique identifier for the endpoint""" + + autoscaling: Autoscaling + """Configuration for automatic scaling of the endpoint""" + + created_at: datetime + """Timestamp when the endpoint was created""" + + display_name: str + """Human-readable name for the endpoint""" + + hardware: str + """The hardware configuration used for this endpoint""" + + model: str + """The model deployed on this endpoint""" + + name: str + """System name for the endpoint""" + + object: Literal["endpoint"] + """The type of object""" + + owner: str + """The owner of this endpoint""" + + state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"] + """Current state of the endpoint""" + + type: Literal["dedicated"] + """The type of endpoint""" diff --git a/src/together/types/endpoint_list_params.py b/src/together/types/endpoint_list_params.py new file mode 100644 index 00000000..5123d49d --- /dev/null +++ b/src/together/types/endpoint_list_params.py @@ -0,0 +1,12 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, TypedDict + +__all__ = ["EndpointListParams"] + + +class EndpointListParams(TypedDict, total=False): + type: Literal["dedicated", "serverless"] + """Filter endpoints by type""" diff --git a/src/together/types/endpoint_list_response.py b/src/together/types/endpoint_list_response.py new file mode 100644 index 00000000..009764a7 --- /dev/null +++ b/src/together/types/endpoint_list_response.py @@ -0,0 +1,41 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List +from datetime import datetime +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["EndpointListResponse", "Data"] + + +class Data(BaseModel): + id: str + """Unique identifier for the endpoint""" + + created_at: datetime + """Timestamp when the endpoint was created""" + + model: str + """The model deployed on this endpoint""" + + name: str + """System name for the endpoint""" + + object: Literal["endpoint"] + """The type of object""" + + owner: str + """The owner of this endpoint""" + + state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"] + """Current state of the endpoint""" + + type: Literal["serverless", "dedicated"] + """The type of endpoint""" + + +class EndpointListResponse(BaseModel): + data: List[Data] + + object: Literal["list"] diff --git a/src/together/types/endpoint_retrieve_response.py b/src/together/types/endpoint_retrieve_response.py new file mode 100644 index 00000000..cb471adc --- /dev/null +++ b/src/together/types/endpoint_retrieve_response.py @@ -0,0 +1,51 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from datetime import datetime +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["EndpointRetrieveResponse", "Autoscaling"] + + +class Autoscaling(BaseModel): + max_replicas: int + """The maximum number of replicas to scale up to under load""" + + min_replicas: int + """The minimum number of replicas to maintain, even when there is no load""" + + +class EndpointRetrieveResponse(BaseModel): + id: str + """Unique identifier for the endpoint""" + + autoscaling: Autoscaling + """Configuration for automatic scaling of the endpoint""" + + created_at: datetime + """Timestamp when the endpoint was created""" + + display_name: str + """Human-readable name for the endpoint""" + + hardware: str + """The hardware configuration used for this endpoint""" + + model: str + """The model deployed on this endpoint""" + + name: str + """System name for the endpoint""" + + object: Literal["endpoint"] + """The type of object""" + + owner: str + """The owner of this endpoint""" + + state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"] + """Current state of the endpoint""" + + type: Literal["dedicated"] + """The type of endpoint""" diff --git a/src/together/types/endpoint_update_params.py b/src/together/types/endpoint_update_params.py new file mode 100644 index 00000000..85ec7527 --- /dev/null +++ b/src/together/types/endpoint_update_params.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Optional +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["EndpointUpdateParams", "Autoscaling"] + + +class EndpointUpdateParams(TypedDict, total=False): + autoscaling: Autoscaling + """New autoscaling configuration for the endpoint""" + + display_name: str + """A human-readable name for the endpoint""" + + inactive_timeout: Optional[int] + """ + The number of minutes of inactivity after which the endpoint will be + automatically stopped. Set to 0 to disable automatic timeout. + """ + + state: Literal["STARTED", "STOPPED"] + """The desired state of the endpoint""" + + +class Autoscaling(TypedDict, total=False): + max_replicas: Required[int] + """The maximum number of replicas to scale up to under load""" + + min_replicas: Required[int] + """The minimum number of replicas to maintain, even when there is no load""" diff --git a/src/together/types/endpoint_update_response.py b/src/together/types/endpoint_update_response.py new file mode 100644 index 00000000..24cb6487 --- /dev/null +++ b/src/together/types/endpoint_update_response.py @@ -0,0 +1,51 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from datetime import datetime +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["EndpointUpdateResponse", "Autoscaling"] + + +class Autoscaling(BaseModel): + max_replicas: int + """The maximum number of replicas to scale up to under load""" + + min_replicas: int + """The minimum number of replicas to maintain, even when there is no load""" + + +class EndpointUpdateResponse(BaseModel): + id: str + """Unique identifier for the endpoint""" + + autoscaling: Autoscaling + """Configuration for automatic scaling of the endpoint""" + + created_at: datetime + """Timestamp when the endpoint was created""" + + display_name: str + """Human-readable name for the endpoint""" + + hardware: str + """The hardware configuration used for this endpoint""" + + model: str + """The model deployed on this endpoint""" + + name: str + """System name for the endpoint""" + + object: Literal["endpoint"] + """The type of object""" + + owner: str + """The owner of this endpoint""" + + state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"] + """Current state of the endpoint""" + + type: Literal["dedicated"] + """The type of endpoint""" diff --git a/src/together/types/execute_response.py b/src/together/types/execute_response.py new file mode 100644 index 00000000..9234b72c --- /dev/null +++ b/src/together/types/execute_response.py @@ -0,0 +1,105 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Dict, List, Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from pydantic import Field as FieldInfo + +from .._utils import PropertyInfo +from .._models import BaseModel + +__all__ = [ + "ExecuteResponse", + "SuccessfulExecution", + "SuccessfulExecutionData", + "SuccessfulExecutionDataOutput", + "SuccessfulExecutionDataOutputStreamOutput", + "SuccessfulExecutionDataOutputError", + "SuccessfulExecutionDataOutputDisplayorExecuteOutput", + "SuccessfulExecutionDataOutputDisplayorExecuteOutputData", + "FailedExecution", +] + + +class SuccessfulExecutionDataOutputStreamOutput(BaseModel): + data: str + + type: Literal["stdout", "stderr"] + + +class SuccessfulExecutionDataOutputError(BaseModel): + data: str + + type: Literal["error"] + + +class SuccessfulExecutionDataOutputDisplayorExecuteOutputData(BaseModel): + application_geo_json: Optional[Dict[str, object]] = FieldInfo(alias="application/geo+json", default=None) + + application_javascript: Optional[str] = FieldInfo(alias="application/javascript", default=None) + + application_json: Optional[Dict[str, object]] = FieldInfo(alias="application/json", default=None) + + application_pdf: Optional[str] = FieldInfo(alias="application/pdf", default=None) + + application_vnd_vega_v5_json: Optional[Dict[str, object]] = FieldInfo( + alias="application/vnd.vega.v5+json", default=None + ) + + application_vnd_vegalite_v4_json: Optional[Dict[str, object]] = FieldInfo( + alias="application/vnd.vegalite.v4+json", default=None + ) + + image_gif: Optional[str] = FieldInfo(alias="image/gif", default=None) + + image_jpeg: Optional[str] = FieldInfo(alias="image/jpeg", default=None) + + image_png: Optional[str] = FieldInfo(alias="image/png", default=None) + + image_svg_xml: Optional[str] = FieldInfo(alias="image/svg+xml", default=None) + + text_html: Optional[str] = FieldInfo(alias="text/html", default=None) + + text_latex: Optional[str] = FieldInfo(alias="text/latex", default=None) + + text_markdown: Optional[str] = FieldInfo(alias="text/markdown", default=None) + + text_plain: Optional[str] = FieldInfo(alias="text/plain", default=None) + + +class SuccessfulExecutionDataOutputDisplayorExecuteOutput(BaseModel): + data: SuccessfulExecutionDataOutputDisplayorExecuteOutputData + + type: Literal["display_data", "execute_result"] + + +SuccessfulExecutionDataOutput: TypeAlias = Annotated[ + Union[ + SuccessfulExecutionDataOutputStreamOutput, + SuccessfulExecutionDataOutputError, + SuccessfulExecutionDataOutputDisplayorExecuteOutput, + ], + PropertyInfo(discriminator="type"), +] + + +class SuccessfulExecutionData(BaseModel): + outputs: List[SuccessfulExecutionDataOutput] + + session_id: str + """Identifier of the current session. Used to make follow-up calls.""" + + +class SuccessfulExecution(BaseModel): + data: SuccessfulExecutionData + + errors: None = None + + +class FailedExecution(BaseModel): + data: None = None + + errors: List[Union[str, Dict[str, object]]] + + +ExecuteResponse: TypeAlias = Union[SuccessfulExecution, FailedExecution] diff --git a/src/together/types/fine_tune.py b/src/together/types/fine_tune.py index ca97fe66..bf873be0 100644 --- a/src/together/types/fine_tune.py +++ b/src/together/types/fine_tune.py @@ -12,6 +12,11 @@ "Event", "LrScheduler", "LrSchedulerLrSchedulerArgs", + "LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs", + "LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs", + "TrainingMethod", + "TrainingMethodTrainingMethodSft", + "TrainingMethodTrainingMethodDpo", "TrainingType", "TrainingTypeFullTrainingType", "TrainingTypeLoRaTrainingType", @@ -74,17 +79,43 @@ class Event(BaseModel): level: Optional[Literal["info", "warning", "error", "legacy_info", "legacy_iwarning", "legacy_ierror"]] = None -class LrSchedulerLrSchedulerArgs(BaseModel): +class LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs(BaseModel): min_lr_ratio: Optional[float] = None """The ratio of the final learning rate to the peak learning rate""" +class LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs(BaseModel): + min_lr_ratio: Optional[float] = None + """The ratio of the final learning rate to the peak learning rate""" + + num_cycles: Optional[float] = None + """Number or fraction of cycles for the cosine learning rate scheduler""" + + +LrSchedulerLrSchedulerArgs: TypeAlias = Union[ + LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs, LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs +] + + class LrScheduler(BaseModel): - lr_scheduler_type: str + lr_scheduler_type: Literal["linear", "cosine"] lr_scheduler_args: Optional[LrSchedulerLrSchedulerArgs] = None +class TrainingMethodTrainingMethodSft(BaseModel): + method: Literal["sft"] + + +class TrainingMethodTrainingMethodDpo(BaseModel): + method: Literal["dpo"] + + dpo_beta: Optional[float] = None + + +TrainingMethod: TypeAlias = Union[TrainingMethodTrainingMethodSft, TrainingMethodTrainingMethodDpo] + + class TrainingTypeFullTrainingType(BaseModel): type: Literal["Full"] @@ -119,7 +150,7 @@ class FineTune(BaseModel): "completed", ] - batch_size: Optional[int] = None + batch_size: Union[int, Literal["max"], None] = None created_at: Optional[str] = None @@ -129,6 +160,8 @@ class FineTune(BaseModel): events: Optional[List[Event]] = None + from_checkpoint: Optional[str] = None + job_id: Optional[str] = None learning_rate: Optional[float] = None @@ -161,6 +194,8 @@ class FineTune(BaseModel): training_file: Optional[str] = None + training_method: Optional[TrainingMethod] = None + training_type: Optional[TrainingType] = None trainingfile_numlines: Optional[int] = None diff --git a/src/together/types/fine_tune_create_params.py b/src/together/types/fine_tune_create_params.py index 1ace4f8d..3291e5a5 100644 --- a/src/together/types/fine_tune_create_params.py +++ b/src/together/types/fine_tune_create_params.py @@ -9,6 +9,11 @@ "FineTuneCreateParams", "LrScheduler", "LrSchedulerLrSchedulerArgs", + "LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs", + "LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs", + "TrainingMethod", + "TrainingMethodTrainingMethodSft", + "TrainingMethodTrainingMethodDpo", "TrainingType", "TrainingTypeFullTrainingType", "TrainingTypeLoRaTrainingType", @@ -22,10 +27,19 @@ class FineTuneCreateParams(TypedDict, total=False): training_file: Required[str] """File-ID of a training file uploaded to the Together API""" - batch_size: int + batch_size: Union[int, Literal["max"]] """ Number of training examples processed together (larger batches use more memory - but may train faster) + but may train faster). Defaults to "max". We use training optimizations like + packing, so the effective batch size may be different than the value you set. + """ + + from_checkpoint: str + """The checkpoint identifier to continue training from a previous fine-tuning job. + + Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or + `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the + final checkpoint will be used. """ learning_rate: float @@ -35,6 +49,10 @@ class FineTuneCreateParams(TypedDict, total=False): """ lr_scheduler: LrScheduler + """The learning rate scheduler to use. + + It specifies how the learning rate is adjusted during training. + """ max_grad_norm: float """Max gradient norm to be used for gradient clipping. Set to 0 to disable.""" @@ -60,6 +78,12 @@ class FineTuneCreateParams(TypedDict, total=False): instruction data. """ + training_method: TrainingMethod + """The training method to use. + + 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization. + """ + training_type: TrainingType validation_file: str @@ -87,20 +111,46 @@ class FineTuneCreateParams(TypedDict, total=False): """ weight_decay: float - """Weight decay""" + """Weight decay. Regularization parameter for the optimizer.""" + + +class LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs(TypedDict, total=False): + min_lr_ratio: float + """The ratio of the final learning rate to the peak learning rate""" -class LrSchedulerLrSchedulerArgs(TypedDict, total=False): +class LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs(TypedDict, total=False): min_lr_ratio: float """The ratio of the final learning rate to the peak learning rate""" + num_cycles: float + """Number or fraction of cycles for the cosine learning rate scheduler""" + + +LrSchedulerLrSchedulerArgs: TypeAlias = Union[ + LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs, LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs +] + class LrScheduler(TypedDict, total=False): - lr_scheduler_type: Required[str] + lr_scheduler_type: Required[Literal["linear", "cosine"]] lr_scheduler_args: LrSchedulerLrSchedulerArgs +class TrainingMethodTrainingMethodSft(TypedDict, total=False): + method: Required[Literal["sft"]] + + +class TrainingMethodTrainingMethodDpo(TypedDict, total=False): + method: Required[Literal["dpo"]] + + dpo_beta: float + + +TrainingMethod: TypeAlias = Union[TrainingMethodTrainingMethodSft, TrainingMethodTrainingMethodDpo] + + class TrainingTypeFullTrainingType(TypedDict, total=False): type: Required[Literal["Full"]] diff --git a/src/together/types/fine_tune_download_response.py b/src/together/types/fine_tune_download_response.py index 055c235e..a5f5953b 100644 --- a/src/together/types/fine_tune_download_response.py +++ b/src/together/types/fine_tune_download_response.py @@ -1,7 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -import builtins from typing import Optional +from typing_extensions import Literal from .._models import BaseModel @@ -15,6 +15,6 @@ class FineTuneDownloadResponse(BaseModel): filename: Optional[str] = None - object: Optional[builtins.object] = None + object: Optional[Literal["local"]] = None size: Optional[int] = None diff --git a/src/together/types/hardware_list_params.py b/src/together/types/hardware_list_params.py new file mode 100644 index 00000000..6506655e --- /dev/null +++ b/src/together/types/hardware_list_params.py @@ -0,0 +1,16 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import TypedDict + +__all__ = ["HardwareListParams"] + + +class HardwareListParams(TypedDict, total=False): + model: str + """Filter hardware configurations by model compatibility. + + When provided, the response includes availability status for each compatible + configuration. + """ diff --git a/src/together/types/hardware_list_response.py b/src/together/types/hardware_list_response.py new file mode 100644 index 00000000..43481726 --- /dev/null +++ b/src/together/types/hardware_list_response.py @@ -0,0 +1,58 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional +from datetime import datetime +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["HardwareListResponse", "Data", "DataPricing", "DataSpecs", "DataAvailability"] + + +class DataPricing(BaseModel): + cents_per_minute: float + """Cost per minute of endpoint uptime in cents""" + + +class DataSpecs(BaseModel): + gpu_count: int + """Number of GPUs in this configuration""" + + gpu_link: str + """The GPU interconnect technology""" + + gpu_memory: float + """Amount of GPU memory in GB""" + + gpu_type: str + """The type/model of GPU""" + + +class DataAvailability(BaseModel): + status: Literal["available", "unavailable", "insufficient"] + """The availability status of the hardware configuration""" + + +class Data(BaseModel): + id: str + """Unique identifier for the hardware configuration""" + + object: Literal["hardware"] + + pricing: DataPricing + """Pricing details for using an endpoint""" + + specs: DataSpecs + """Detailed specifications of a hardware configuration""" + + updated_at: datetime + """Timestamp of when the hardware status was last updated""" + + availability: Optional[DataAvailability] = None + """Indicates the current availability status of a hardware configuration""" + + +class HardwareListResponse(BaseModel): + data: List[Data] + + object: Literal["list"] diff --git a/src/together/types/job_list_response.py b/src/together/types/job_list_response.py new file mode 100644 index 00000000..11281d23 --- /dev/null +++ b/src/together/types/job_list_response.py @@ -0,0 +1,47 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional +from datetime import datetime +from typing_extensions import Literal + +from pydantic import Field as FieldInfo + +from .._models import BaseModel + +__all__ = ["JobListResponse", "Data", "DataArgs", "DataStatusUpdate"] + + +class DataArgs(BaseModel): + description: Optional[str] = None + + api_model_name: Optional[str] = FieldInfo(alias="modelName", default=None) + + api_model_source: Optional[str] = FieldInfo(alias="modelSource", default=None) + + +class DataStatusUpdate(BaseModel): + message: str + + status: str + + timestamp: datetime + + +class Data(BaseModel): + args: DataArgs + + created_at: datetime + + job_id: str + + status: Literal["Queued", "Running", "Complete", "Failed"] + + status_updates: List[DataStatusUpdate] + + type: str + + updated_at: datetime + + +class JobListResponse(BaseModel): + data: List[Data] diff --git a/src/together/types/job_retrieve_response.py b/src/together/types/job_retrieve_response.py new file mode 100644 index 00000000..ded83144 --- /dev/null +++ b/src/together/types/job_retrieve_response.py @@ -0,0 +1,43 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional +from datetime import datetime +from typing_extensions import Literal + +from pydantic import Field as FieldInfo + +from .._models import BaseModel + +__all__ = ["JobRetrieveResponse", "Args", "StatusUpdate"] + + +class Args(BaseModel): + description: Optional[str] = None + + api_model_name: Optional[str] = FieldInfo(alias="modelName", default=None) + + api_model_source: Optional[str] = FieldInfo(alias="modelSource", default=None) + + +class StatusUpdate(BaseModel): + message: str + + status: str + + timestamp: datetime + + +class JobRetrieveResponse(BaseModel): + args: Args + + created_at: datetime + + job_id: str + + status: Literal["Queued", "Running", "Complete", "Failed"] + + status_updates: List[StatusUpdate] + + type: str + + updated_at: datetime diff --git a/src/together/types/model_upload_params.py b/src/together/types/model_upload_params.py new file mode 100644 index 00000000..9a159405 --- /dev/null +++ b/src/together/types/model_upload_params.py @@ -0,0 +1,36 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, Required, TypedDict + +__all__ = ["ModelUploadParams"] + + +class ModelUploadParams(TypedDict, total=False): + model_name: Required[str] + """The name to give to your uploaded model""" + + model_source: Required[str] + """The source location of the model (Hugging Face repo or S3 path)""" + + base_model: str + """ + The base model to use for an adapter if setting it to run against a serverless + pool. Only used for model_type `adapter`. + """ + + description: str + """A description of your model""" + + hf_token: str + """Hugging Face token (if uploading from Hugging Face)""" + + lora_model: str + """ + The lora pool to use for an adapter if setting it to run against, say, a + dedicated pool. Only used for model_type `adapter`. + """ + + model_type: Literal["model", "adapter"] + """Whether the model is a full model or an adapter""" diff --git a/src/together/types/model_upload_response.py b/src/together/types/model_upload_response.py new file mode 100644 index 00000000..9b8d9237 --- /dev/null +++ b/src/together/types/model_upload_response.py @@ -0,0 +1,23 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from pydantic import Field as FieldInfo + +from .._models import BaseModel + +__all__ = ["ModelUploadResponse", "Data"] + + +class Data(BaseModel): + job_id: str + + api_model_id: str = FieldInfo(alias="model_id") + + api_model_name: str = FieldInfo(alias="model_name") + + api_model_source: str = FieldInfo(alias="model_source") + + +class ModelUploadResponse(BaseModel): + data: Data + + message: str diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index 88553cec..207023a6 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -26,7 +26,7 @@ def test_method_create_overload_1(self, client: Together) -> None: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", ) assert_matches_type(ChatCompletion, completion, path=["response"]) @@ -39,7 +39,7 @@ def test_method_create_with_all_params_overload_1(self, client: Together) -> Non "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", context_length_exceeded_behavior="truncate", echo=True, frequency_penalty=0, @@ -55,7 +55,7 @@ def test_method_create_with_all_params_overload_1(self, client: Together) -> Non presence_penalty=0, repetition_penalty=0, response_format={ - "schema": {"foo": "string"}, + "schema": {"foo": "bar"}, "type": "json", }, safety_model="safety_model_name", @@ -88,7 +88,7 @@ def test_raw_response_create_overload_1(self, client: Together) -> None: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", ) assert response.is_closed is True @@ -105,7 +105,7 @@ def test_streaming_response_create_overload_1(self, client: Together) -> None: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -124,7 +124,7 @@ def test_method_create_overload_2(self, client: Together) -> None: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", stream=True, ) completion_stream.response.close() @@ -138,7 +138,7 @@ def test_method_create_with_all_params_overload_2(self, client: Together) -> Non "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", stream=True, context_length_exceeded_behavior="truncate", echo=True, @@ -155,7 +155,7 @@ def test_method_create_with_all_params_overload_2(self, client: Together) -> Non presence_penalty=0, repetition_penalty=0, response_format={ - "schema": {"foo": "string"}, + "schema": {"foo": "bar"}, "type": "json", }, safety_model="safety_model_name", @@ -187,7 +187,7 @@ def test_raw_response_create_overload_2(self, client: Together) -> None: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", stream=True, ) @@ -204,7 +204,7 @@ def test_streaming_response_create_overload_2(self, client: Together) -> None: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", stream=True, ) as response: assert not response.is_closed @@ -228,7 +228,7 @@ async def test_method_create_overload_1(self, async_client: AsyncTogether) -> No "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", ) assert_matches_type(ChatCompletion, completion, path=["response"]) @@ -241,7 +241,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", context_length_exceeded_behavior="truncate", echo=True, frequency_penalty=0, @@ -257,7 +257,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn presence_penalty=0, repetition_penalty=0, response_format={ - "schema": {"foo": "string"}, + "schema": {"foo": "bar"}, "type": "json", }, safety_model="safety_model_name", @@ -290,7 +290,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncTogether) "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", ) assert response.is_closed is True @@ -307,7 +307,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncTog "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -326,7 +326,7 @@ async def test_method_create_overload_2(self, async_client: AsyncTogether) -> No "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", stream=True, ) await completion_stream.response.aclose() @@ -340,7 +340,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", stream=True, context_length_exceeded_behavior="truncate", echo=True, @@ -357,7 +357,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn presence_penalty=0, repetition_penalty=0, response_format={ - "schema": {"foo": "string"}, + "schema": {"foo": "bar"}, "type": "json", }, safety_model="safety_model_name", @@ -389,7 +389,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncTogether) "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", stream=True, ) @@ -406,7 +406,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncTog "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", stream=True, ) as response: assert not response.is_closed diff --git a/tests/api_resources/code_interpreter/__init__.py b/tests/api_resources/code_interpreter/__init__.py new file mode 100644 index 00000000..fd8019a9 --- /dev/null +++ b/tests/api_resources/code_interpreter/__init__.py @@ -0,0 +1 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. diff --git a/tests/api_resources/code_interpreter/test_sessions.py b/tests/api_resources/code_interpreter/test_sessions.py new file mode 100644 index 00000000..e53d7a4a --- /dev/null +++ b/tests/api_resources/code_interpreter/test_sessions.py @@ -0,0 +1,90 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from together import Together, AsyncTogether +from tests.utils import assert_matches_type +from together.types.code_interpreter import SessionListResponse + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestSessions: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + def test_method_list(self, client: Together) -> None: + session = client.code_interpreter.sessions.list() + assert_matches_type(SessionListResponse, session, path=["response"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + def test_raw_response_list(self, client: Together) -> None: + response = client.code_interpreter.sessions.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + session = response.parse() + assert_matches_type(SessionListResponse, session, path=["response"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + def test_streaming_response_list(self, client: Together) -> None: + with client.code_interpreter.sessions.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + session = response.parse() + assert_matches_type(SessionListResponse, session, path=["response"]) + + assert cast(Any, response.is_closed) is True + + +class TestAsyncSessions: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + async def test_method_list(self, async_client: AsyncTogether) -> None: + session = await async_client.code_interpreter.sessions.list() + assert_matches_type(SessionListResponse, session, path=["response"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + async def test_raw_response_list(self, async_client: AsyncTogether) -> None: + response = await async_client.code_interpreter.sessions.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + session = await response.parse() + assert_matches_type(SessionListResponse, session, path=["response"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + async def test_streaming_response_list(self, async_client: AsyncTogether) -> None: + async with async_client.code_interpreter.sessions.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + session = await response.parse() + assert_matches_type(SessionListResponse, session, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/test_code_interpreter.py b/tests/api_resources/test_code_interpreter.py new file mode 100644 index 00000000..17c1928c --- /dev/null +++ b/tests/api_resources/test_code_interpreter.py @@ -0,0 +1,146 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from together import Together, AsyncTogether +from tests.utils import assert_matches_type +from together.types import ExecuteResponse + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestCodeInterpreter: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + def test_method_execute(self, client: Together) -> None: + code_interpreter = client.code_interpreter.execute( + code="print('Hello, world!')", + language="python", + ) + assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + def test_method_execute_with_all_params(self, client: Together) -> None: + code_interpreter = client.code_interpreter.execute( + code="print('Hello, world!')", + language="python", + files=[ + { + "content": "content", + "encoding": "string", + "name": "name", + } + ], + session_id="ses_abcDEF123", + ) + assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + def test_raw_response_execute(self, client: Together) -> None: + response = client.code_interpreter.with_raw_response.execute( + code="print('Hello, world!')", + language="python", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + code_interpreter = response.parse() + assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + def test_streaming_response_execute(self, client: Together) -> None: + with client.code_interpreter.with_streaming_response.execute( + code="print('Hello, world!')", + language="python", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + code_interpreter = response.parse() + assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) + + assert cast(Any, response.is_closed) is True + + +class TestAsyncCodeInterpreter: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + async def test_method_execute(self, async_client: AsyncTogether) -> None: + code_interpreter = await async_client.code_interpreter.execute( + code="print('Hello, world!')", + language="python", + ) + assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + async def test_method_execute_with_all_params(self, async_client: AsyncTogether) -> None: + code_interpreter = await async_client.code_interpreter.execute( + code="print('Hello, world!')", + language="python", + files=[ + { + "content": "content", + "encoding": "string", + "name": "name", + } + ], + session_id="ses_abcDEF123", + ) + assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + async def test_raw_response_execute(self, async_client: AsyncTogether) -> None: + response = await async_client.code_interpreter.with_raw_response.execute( + code="print('Hello, world!')", + language="python", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + code_interpreter = await response.parse() + assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) + + @pytest.mark.skip( + reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url" + ) + @parametrize + async def test_streaming_response_execute(self, async_client: AsyncTogether) -> None: + async with async_client.code_interpreter.with_streaming_response.execute( + code="print('Hello, world!')", + language="python", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + code_interpreter = await response.parse() + assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py index a81e24c5..084ad480 100644 --- a/tests/api_resources/test_embeddings.py +++ b/tests/api_resources/test_embeddings.py @@ -21,7 +21,7 @@ class TestEmbeddings: def test_method_create(self, client: Together) -> None: embedding = client.embeddings.create( input="Our solar system orbits the Milky Way galaxy at about 515,000 mph", - model="WhereIsAI/UAE-Large-V1", + model="togethercomputer/m2-bert-80M-8k-retrieval", ) assert_matches_type(Embedding, embedding, path=["response"]) @@ -29,7 +29,7 @@ def test_method_create(self, client: Together) -> None: def test_raw_response_create(self, client: Together) -> None: response = client.embeddings.with_raw_response.create( input="Our solar system orbits the Milky Way galaxy at about 515,000 mph", - model="WhereIsAI/UAE-Large-V1", + model="togethercomputer/m2-bert-80M-8k-retrieval", ) assert response.is_closed is True @@ -41,7 +41,7 @@ def test_raw_response_create(self, client: Together) -> None: def test_streaming_response_create(self, client: Together) -> None: with client.embeddings.with_streaming_response.create( input="Our solar system orbits the Milky Way galaxy at about 515,000 mph", - model="WhereIsAI/UAE-Large-V1", + model="togethercomputer/m2-bert-80M-8k-retrieval", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -59,7 +59,7 @@ class TestAsyncEmbeddings: async def test_method_create(self, async_client: AsyncTogether) -> None: embedding = await async_client.embeddings.create( input="Our solar system orbits the Milky Way galaxy at about 515,000 mph", - model="WhereIsAI/UAE-Large-V1", + model="togethercomputer/m2-bert-80M-8k-retrieval", ) assert_matches_type(Embedding, embedding, path=["response"]) @@ -67,7 +67,7 @@ async def test_method_create(self, async_client: AsyncTogether) -> None: async def test_raw_response_create(self, async_client: AsyncTogether) -> None: response = await async_client.embeddings.with_raw_response.create( input="Our solar system orbits the Milky Way galaxy at about 515,000 mph", - model="WhereIsAI/UAE-Large-V1", + model="togethercomputer/m2-bert-80M-8k-retrieval", ) assert response.is_closed is True @@ -79,7 +79,7 @@ async def test_raw_response_create(self, async_client: AsyncTogether) -> None: async def test_streaming_response_create(self, async_client: AsyncTogether) -> None: async with async_client.embeddings.with_streaming_response.create( input="Our solar system orbits the Milky Way galaxy at about 515,000 mph", - model="WhereIsAI/UAE-Large-V1", + model="togethercomputer/m2-bert-80M-8k-retrieval", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" diff --git a/tests/api_resources/test_endpoints.py b/tests/api_resources/test_endpoints.py new file mode 100644 index 00000000..59cbc6ab --- /dev/null +++ b/tests/api_resources/test_endpoints.py @@ -0,0 +1,473 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from together import Together, AsyncTogether +from tests.utils import assert_matches_type +from together.types import ( + EndpointListResponse, + EndpointCreateResponse, + EndpointUpdateResponse, + EndpointRetrieveResponse, +) + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestEndpoints: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_create(self, client: Together) -> None: + endpoint = client.endpoints.create( + autoscaling={ + "max_replicas": 5, + "min_replicas": 2, + }, + hardware="1x_nvidia_a100_80gb_sxm", + model="meta-llama/Llama-3-8b-chat-hf", + ) + assert_matches_type(EndpointCreateResponse, endpoint, path=["response"]) + + @parametrize + def test_method_create_with_all_params(self, client: Together) -> None: + endpoint = client.endpoints.create( + autoscaling={ + "max_replicas": 5, + "min_replicas": 2, + }, + hardware="1x_nvidia_a100_80gb_sxm", + model="meta-llama/Llama-3-8b-chat-hf", + disable_prompt_cache=True, + disable_speculative_decoding=True, + display_name="My Llama3 70b endpoint", + inactive_timeout=60, + state="STARTED", + ) + assert_matches_type(EndpointCreateResponse, endpoint, path=["response"]) + + @parametrize + def test_raw_response_create(self, client: Together) -> None: + response = client.endpoints.with_raw_response.create( + autoscaling={ + "max_replicas": 5, + "min_replicas": 2, + }, + hardware="1x_nvidia_a100_80gb_sxm", + model="meta-llama/Llama-3-8b-chat-hf", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + endpoint = response.parse() + assert_matches_type(EndpointCreateResponse, endpoint, path=["response"]) + + @parametrize + def test_streaming_response_create(self, client: Together) -> None: + with client.endpoints.with_streaming_response.create( + autoscaling={ + "max_replicas": 5, + "min_replicas": 2, + }, + hardware="1x_nvidia_a100_80gb_sxm", + model="meta-llama/Llama-3-8b-chat-hf", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + endpoint = response.parse() + assert_matches_type(EndpointCreateResponse, endpoint, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_retrieve(self, client: Together) -> None: + endpoint = client.endpoints.retrieve( + "endpointId", + ) + assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"]) + + @parametrize + def test_raw_response_retrieve(self, client: Together) -> None: + response = client.endpoints.with_raw_response.retrieve( + "endpointId", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + endpoint = response.parse() + assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"]) + + @parametrize + def test_streaming_response_retrieve(self, client: Together) -> None: + with client.endpoints.with_streaming_response.retrieve( + "endpointId", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + endpoint = response.parse() + assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_retrieve(self, client: Together) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"): + client.endpoints.with_raw_response.retrieve( + "", + ) + + @parametrize + def test_method_update(self, client: Together) -> None: + endpoint = client.endpoints.update( + endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7", + ) + assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"]) + + @parametrize + def test_method_update_with_all_params(self, client: Together) -> None: + endpoint = client.endpoints.update( + endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7", + autoscaling={ + "max_replicas": 5, + "min_replicas": 2, + }, + display_name="My Llama3 70b endpoint", + inactive_timeout=60, + state="STARTED", + ) + assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"]) + + @parametrize + def test_raw_response_update(self, client: Together) -> None: + response = client.endpoints.with_raw_response.update( + endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + endpoint = response.parse() + assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"]) + + @parametrize + def test_streaming_response_update(self, client: Together) -> None: + with client.endpoints.with_streaming_response.update( + endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + endpoint = response.parse() + assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_update(self, client: Together) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"): + client.endpoints.with_raw_response.update( + endpoint_id="", + ) + + @parametrize + def test_method_list(self, client: Together) -> None: + endpoint = client.endpoints.list() + assert_matches_type(EndpointListResponse, endpoint, path=["response"]) + + @parametrize + def test_method_list_with_all_params(self, client: Together) -> None: + endpoint = client.endpoints.list( + type="dedicated", + ) + assert_matches_type(EndpointListResponse, endpoint, path=["response"]) + + @parametrize + def test_raw_response_list(self, client: Together) -> None: + response = client.endpoints.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + endpoint = response.parse() + assert_matches_type(EndpointListResponse, endpoint, path=["response"]) + + @parametrize + def test_streaming_response_list(self, client: Together) -> None: + with client.endpoints.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + endpoint = response.parse() + assert_matches_type(EndpointListResponse, endpoint, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_delete(self, client: Together) -> None: + endpoint = client.endpoints.delete( + "endpointId", + ) + assert endpoint is None + + @parametrize + def test_raw_response_delete(self, client: Together) -> None: + response = client.endpoints.with_raw_response.delete( + "endpointId", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + endpoint = response.parse() + assert endpoint is None + + @parametrize + def test_streaming_response_delete(self, client: Together) -> None: + with client.endpoints.with_streaming_response.delete( + "endpointId", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + endpoint = response.parse() + assert endpoint is None + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_delete(self, client: Together) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"): + client.endpoints.with_raw_response.delete( + "", + ) + + +class TestAsyncEndpoints: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + async def test_method_create(self, async_client: AsyncTogether) -> None: + endpoint = await async_client.endpoints.create( + autoscaling={ + "max_replicas": 5, + "min_replicas": 2, + }, + hardware="1x_nvidia_a100_80gb_sxm", + model="meta-llama/Llama-3-8b-chat-hf", + ) + assert_matches_type(EndpointCreateResponse, endpoint, path=["response"]) + + @parametrize + async def test_method_create_with_all_params(self, async_client: AsyncTogether) -> None: + endpoint = await async_client.endpoints.create( + autoscaling={ + "max_replicas": 5, + "min_replicas": 2, + }, + hardware="1x_nvidia_a100_80gb_sxm", + model="meta-llama/Llama-3-8b-chat-hf", + disable_prompt_cache=True, + disable_speculative_decoding=True, + display_name="My Llama3 70b endpoint", + inactive_timeout=60, + state="STARTED", + ) + assert_matches_type(EndpointCreateResponse, endpoint, path=["response"]) + + @parametrize + async def test_raw_response_create(self, async_client: AsyncTogether) -> None: + response = await async_client.endpoints.with_raw_response.create( + autoscaling={ + "max_replicas": 5, + "min_replicas": 2, + }, + hardware="1x_nvidia_a100_80gb_sxm", + model="meta-llama/Llama-3-8b-chat-hf", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + endpoint = await response.parse() + assert_matches_type(EndpointCreateResponse, endpoint, path=["response"]) + + @parametrize + async def test_streaming_response_create(self, async_client: AsyncTogether) -> None: + async with async_client.endpoints.with_streaming_response.create( + autoscaling={ + "max_replicas": 5, + "min_replicas": 2, + }, + hardware="1x_nvidia_a100_80gb_sxm", + model="meta-llama/Llama-3-8b-chat-hf", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + endpoint = await response.parse() + assert_matches_type(EndpointCreateResponse, endpoint, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_retrieve(self, async_client: AsyncTogether) -> None: + endpoint = await async_client.endpoints.retrieve( + "endpointId", + ) + assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"]) + + @parametrize + async def test_raw_response_retrieve(self, async_client: AsyncTogether) -> None: + response = await async_client.endpoints.with_raw_response.retrieve( + "endpointId", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + endpoint = await response.parse() + assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"]) + + @parametrize + async def test_streaming_response_retrieve(self, async_client: AsyncTogether) -> None: + async with async_client.endpoints.with_streaming_response.retrieve( + "endpointId", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + endpoint = await response.parse() + assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_retrieve(self, async_client: AsyncTogether) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"): + await async_client.endpoints.with_raw_response.retrieve( + "", + ) + + @parametrize + async def test_method_update(self, async_client: AsyncTogether) -> None: + endpoint = await async_client.endpoints.update( + endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7", + ) + assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"]) + + @parametrize + async def test_method_update_with_all_params(self, async_client: AsyncTogether) -> None: + endpoint = await async_client.endpoints.update( + endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7", + autoscaling={ + "max_replicas": 5, + "min_replicas": 2, + }, + display_name="My Llama3 70b endpoint", + inactive_timeout=60, + state="STARTED", + ) + assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"]) + + @parametrize + async def test_raw_response_update(self, async_client: AsyncTogether) -> None: + response = await async_client.endpoints.with_raw_response.update( + endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + endpoint = await response.parse() + assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"]) + + @parametrize + async def test_streaming_response_update(self, async_client: AsyncTogether) -> None: + async with async_client.endpoints.with_streaming_response.update( + endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + endpoint = await response.parse() + assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_update(self, async_client: AsyncTogether) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"): + await async_client.endpoints.with_raw_response.update( + endpoint_id="", + ) + + @parametrize + async def test_method_list(self, async_client: AsyncTogether) -> None: + endpoint = await async_client.endpoints.list() + assert_matches_type(EndpointListResponse, endpoint, path=["response"]) + + @parametrize + async def test_method_list_with_all_params(self, async_client: AsyncTogether) -> None: + endpoint = await async_client.endpoints.list( + type="dedicated", + ) + assert_matches_type(EndpointListResponse, endpoint, path=["response"]) + + @parametrize + async def test_raw_response_list(self, async_client: AsyncTogether) -> None: + response = await async_client.endpoints.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + endpoint = await response.parse() + assert_matches_type(EndpointListResponse, endpoint, path=["response"]) + + @parametrize + async def test_streaming_response_list(self, async_client: AsyncTogether) -> None: + async with async_client.endpoints.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + endpoint = await response.parse() + assert_matches_type(EndpointListResponse, endpoint, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_delete(self, async_client: AsyncTogether) -> None: + endpoint = await async_client.endpoints.delete( + "endpointId", + ) + assert endpoint is None + + @parametrize + async def test_raw_response_delete(self, async_client: AsyncTogether) -> None: + response = await async_client.endpoints.with_raw_response.delete( + "endpointId", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + endpoint = await response.parse() + assert endpoint is None + + @parametrize + async def test_streaming_response_delete(self, async_client: AsyncTogether) -> None: + async with async_client.endpoints.with_streaming_response.delete( + "endpointId", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + endpoint = await response.parse() + assert endpoint is None + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_delete(self, async_client: AsyncTogether) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"): + await async_client.endpoints.with_raw_response.delete( + "", + ) diff --git a/tests/api_resources/test_fine_tune.py b/tests/api_resources/test_fine_tune.py index 8a4eda52..4545089a 100644 --- a/tests/api_resources/test_fine_tune.py +++ b/tests/api_resources/test_fine_tune.py @@ -22,6 +22,7 @@ class TestFineTune: parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + @pytest.mark.skip(reason="invalid oneOf in required props") @parametrize def test_method_create(self, client: Together) -> None: fine_tune = client.fine_tune.create( @@ -30,15 +31,17 @@ def test_method_create(self, client: Together) -> None: ) assert_matches_type(FineTune, fine_tune, path=["response"]) + @pytest.mark.skip(reason="invalid oneOf in required props") @parametrize def test_method_create_with_all_params(self, client: Together) -> None: fine_tune = client.fine_tune.create( model="model", training_file="training_file", batch_size=0, + from_checkpoint="from_checkpoint", learning_rate=0, lr_scheduler={ - "lr_scheduler_type": "lr_scheduler_type", + "lr_scheduler_type": "linear", "lr_scheduler_args": {"min_lr_ratio": 0}, }, max_grad_norm=0, @@ -47,6 +50,7 @@ def test_method_create_with_all_params(self, client: Together) -> None: n_evals=0, suffix="suffix", train_on_inputs=True, + training_method={"method": "sft"}, training_type={"type": "Full"}, validation_file="validation_file", wandb_api_key="wandb_api_key", @@ -58,6 +62,7 @@ def test_method_create_with_all_params(self, client: Together) -> None: ) assert_matches_type(FineTune, fine_tune, path=["response"]) + @pytest.mark.skip(reason="invalid oneOf in required props") @parametrize def test_raw_response_create(self, client: Together) -> None: response = client.fine_tune.with_raw_response.create( @@ -70,6 +75,7 @@ def test_raw_response_create(self, client: Together) -> None: fine_tune = response.parse() assert_matches_type(FineTune, fine_tune, path=["response"]) + @pytest.mark.skip(reason="invalid oneOf in required props") @parametrize def test_streaming_response_create(self, client: Together) -> None: with client.fine_tune.with_streaming_response.create( @@ -268,6 +274,7 @@ def test_path_params_list_events(self, client: Together) -> None: class TestAsyncFineTune: parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + @pytest.mark.skip(reason="invalid oneOf in required props") @parametrize async def test_method_create(self, async_client: AsyncTogether) -> None: fine_tune = await async_client.fine_tune.create( @@ -276,15 +283,17 @@ async def test_method_create(self, async_client: AsyncTogether) -> None: ) assert_matches_type(FineTune, fine_tune, path=["response"]) + @pytest.mark.skip(reason="invalid oneOf in required props") @parametrize async def test_method_create_with_all_params(self, async_client: AsyncTogether) -> None: fine_tune = await async_client.fine_tune.create( model="model", training_file="training_file", batch_size=0, + from_checkpoint="from_checkpoint", learning_rate=0, lr_scheduler={ - "lr_scheduler_type": "lr_scheduler_type", + "lr_scheduler_type": "linear", "lr_scheduler_args": {"min_lr_ratio": 0}, }, max_grad_norm=0, @@ -293,6 +302,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether) n_evals=0, suffix="suffix", train_on_inputs=True, + training_method={"method": "sft"}, training_type={"type": "Full"}, validation_file="validation_file", wandb_api_key="wandb_api_key", @@ -304,6 +314,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether) ) assert_matches_type(FineTune, fine_tune, path=["response"]) + @pytest.mark.skip(reason="invalid oneOf in required props") @parametrize async def test_raw_response_create(self, async_client: AsyncTogether) -> None: response = await async_client.fine_tune.with_raw_response.create( @@ -316,6 +327,7 @@ async def test_raw_response_create(self, async_client: AsyncTogether) -> None: fine_tune = await response.parse() assert_matches_type(FineTune, fine_tune, path=["response"]) + @pytest.mark.skip(reason="invalid oneOf in required props") @parametrize async def test_streaming_response_create(self, async_client: AsyncTogether) -> None: async with async_client.fine_tune.with_streaming_response.create( diff --git a/tests/api_resources/test_hardware.py b/tests/api_resources/test_hardware.py new file mode 100644 index 00000000..aafe18f0 --- /dev/null +++ b/tests/api_resources/test_hardware.py @@ -0,0 +1,86 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from together import Together, AsyncTogether +from tests.utils import assert_matches_type +from together.types import HardwareListResponse + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestHardware: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_list(self, client: Together) -> None: + hardware = client.hardware.list() + assert_matches_type(HardwareListResponse, hardware, path=["response"]) + + @parametrize + def test_method_list_with_all_params(self, client: Together) -> None: + hardware = client.hardware.list( + model="model", + ) + assert_matches_type(HardwareListResponse, hardware, path=["response"]) + + @parametrize + def test_raw_response_list(self, client: Together) -> None: + response = client.hardware.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + hardware = response.parse() + assert_matches_type(HardwareListResponse, hardware, path=["response"]) + + @parametrize + def test_streaming_response_list(self, client: Together) -> None: + with client.hardware.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + hardware = response.parse() + assert_matches_type(HardwareListResponse, hardware, path=["response"]) + + assert cast(Any, response.is_closed) is True + + +class TestAsyncHardware: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + async def test_method_list(self, async_client: AsyncTogether) -> None: + hardware = await async_client.hardware.list() + assert_matches_type(HardwareListResponse, hardware, path=["response"]) + + @parametrize + async def test_method_list_with_all_params(self, async_client: AsyncTogether) -> None: + hardware = await async_client.hardware.list( + model="model", + ) + assert_matches_type(HardwareListResponse, hardware, path=["response"]) + + @parametrize + async def test_raw_response_list(self, async_client: AsyncTogether) -> None: + response = await async_client.hardware.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + hardware = await response.parse() + assert_matches_type(HardwareListResponse, hardware, path=["response"]) + + @parametrize + async def test_streaming_response_list(self, async_client: AsyncTogether) -> None: + async with async_client.hardware.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + hardware = await response.parse() + assert_matches_type(HardwareListResponse, hardware, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/test_images.py b/tests/api_resources/test_images.py index 2d1dda01..0e84fb5f 100644 --- a/tests/api_resources/test_images.py +++ b/tests/api_resources/test_images.py @@ -21,7 +21,7 @@ class TestImages: @parametrize def test_method_create(self, client: Together) -> None: image = client.images.create( - model="black-forest-labs/FLUX.1-schnell-Free", + model="black-forest-labs/FLUX.1-schnell", prompt="cat floating in space, cinematic", ) assert_matches_type(ImageFile, image, path=["response"]) @@ -30,7 +30,7 @@ def test_method_create(self, client: Together) -> None: @parametrize def test_method_create_with_all_params(self, client: Together) -> None: image = client.images.create( - model="black-forest-labs/FLUX.1-schnell-Free", + model="black-forest-labs/FLUX.1-schnell", prompt="cat floating in space, cinematic", guidance=0, height=0, @@ -55,7 +55,7 @@ def test_method_create_with_all_params(self, client: Together) -> None: @parametrize def test_raw_response_create(self, client: Together) -> None: response = client.images.with_raw_response.create( - model="black-forest-labs/FLUX.1-schnell-Free", + model="black-forest-labs/FLUX.1-schnell", prompt="cat floating in space, cinematic", ) @@ -68,7 +68,7 @@ def test_raw_response_create(self, client: Together) -> None: @parametrize def test_streaming_response_create(self, client: Together) -> None: with client.images.with_streaming_response.create( - model="black-forest-labs/FLUX.1-schnell-Free", + model="black-forest-labs/FLUX.1-schnell", prompt="cat floating in space, cinematic", ) as response: assert not response.is_closed @@ -87,7 +87,7 @@ class TestAsyncImages: @parametrize async def test_method_create(self, async_client: AsyncTogether) -> None: image = await async_client.images.create( - model="black-forest-labs/FLUX.1-schnell-Free", + model="black-forest-labs/FLUX.1-schnell", prompt="cat floating in space, cinematic", ) assert_matches_type(ImageFile, image, path=["response"]) @@ -96,7 +96,7 @@ async def test_method_create(self, async_client: AsyncTogether) -> None: @parametrize async def test_method_create_with_all_params(self, async_client: AsyncTogether) -> None: image = await async_client.images.create( - model="black-forest-labs/FLUX.1-schnell-Free", + model="black-forest-labs/FLUX.1-schnell", prompt="cat floating in space, cinematic", guidance=0, height=0, @@ -121,7 +121,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether) @parametrize async def test_raw_response_create(self, async_client: AsyncTogether) -> None: response = await async_client.images.with_raw_response.create( - model="black-forest-labs/FLUX.1-schnell-Free", + model="black-forest-labs/FLUX.1-schnell", prompt="cat floating in space, cinematic", ) @@ -134,7 +134,7 @@ async def test_raw_response_create(self, async_client: AsyncTogether) -> None: @parametrize async def test_streaming_response_create(self, async_client: AsyncTogether) -> None: async with async_client.images.with_streaming_response.create( - model="black-forest-labs/FLUX.1-schnell-Free", + model="black-forest-labs/FLUX.1-schnell", prompt="cat floating in space, cinematic", ) as response: assert not response.is_closed diff --git a/tests/api_resources/test_jobs.py b/tests/api_resources/test_jobs.py new file mode 100644 index 00000000..110600d7 --- /dev/null +++ b/tests/api_resources/test_jobs.py @@ -0,0 +1,148 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from together import Together, AsyncTogether +from tests.utils import assert_matches_type +from together.types import JobListResponse, JobRetrieveResponse + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestJobs: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_retrieve(self, client: Together) -> None: + job = client.jobs.retrieve( + "jobId", + ) + assert_matches_type(JobRetrieveResponse, job, path=["response"]) + + @parametrize + def test_raw_response_retrieve(self, client: Together) -> None: + response = client.jobs.with_raw_response.retrieve( + "jobId", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + job = response.parse() + assert_matches_type(JobRetrieveResponse, job, path=["response"]) + + @parametrize + def test_streaming_response_retrieve(self, client: Together) -> None: + with client.jobs.with_streaming_response.retrieve( + "jobId", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + job = response.parse() + assert_matches_type(JobRetrieveResponse, job, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_retrieve(self, client: Together) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"): + client.jobs.with_raw_response.retrieve( + "", + ) + + @parametrize + def test_method_list(self, client: Together) -> None: + job = client.jobs.list() + assert_matches_type(JobListResponse, job, path=["response"]) + + @parametrize + def test_raw_response_list(self, client: Together) -> None: + response = client.jobs.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + job = response.parse() + assert_matches_type(JobListResponse, job, path=["response"]) + + @parametrize + def test_streaming_response_list(self, client: Together) -> None: + with client.jobs.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + job = response.parse() + assert_matches_type(JobListResponse, job, path=["response"]) + + assert cast(Any, response.is_closed) is True + + +class TestAsyncJobs: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + async def test_method_retrieve(self, async_client: AsyncTogether) -> None: + job = await async_client.jobs.retrieve( + "jobId", + ) + assert_matches_type(JobRetrieveResponse, job, path=["response"]) + + @parametrize + async def test_raw_response_retrieve(self, async_client: AsyncTogether) -> None: + response = await async_client.jobs.with_raw_response.retrieve( + "jobId", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + job = await response.parse() + assert_matches_type(JobRetrieveResponse, job, path=["response"]) + + @parametrize + async def test_streaming_response_retrieve(self, async_client: AsyncTogether) -> None: + async with async_client.jobs.with_streaming_response.retrieve( + "jobId", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + job = await response.parse() + assert_matches_type(JobRetrieveResponse, job, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_retrieve(self, async_client: AsyncTogether) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"): + await async_client.jobs.with_raw_response.retrieve( + "", + ) + + @parametrize + async def test_method_list(self, async_client: AsyncTogether) -> None: + job = await async_client.jobs.list() + assert_matches_type(JobListResponse, job, path=["response"]) + + @parametrize + async def test_raw_response_list(self, async_client: AsyncTogether) -> None: + response = await async_client.jobs.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + job = await response.parse() + assert_matches_type(JobListResponse, job, path=["response"]) + + @parametrize + async def test_streaming_response_list(self, async_client: AsyncTogether) -> None: + async with async_client.jobs.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + job = await response.parse() + assert_matches_type(JobListResponse, job, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py index 4a0d63e8..fbf910a0 100644 --- a/tests/api_resources/test_models.py +++ b/tests/api_resources/test_models.py @@ -9,7 +9,7 @@ from together import Together, AsyncTogether from tests.utils import assert_matches_type -from together.types import ModelListResponse +from together.types import ModelListResponse, ModelUploadResponse base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -42,6 +42,53 @@ def test_streaming_response_list(self, client: Together) -> None: assert cast(Any, response.is_closed) is True + @parametrize + def test_method_upload(self, client: Together) -> None: + model = client.models.upload( + model_name="Qwen2.5-72B-Instruct", + model_source="unsloth/Qwen2.5-72B-Instruct", + ) + assert_matches_type(ModelUploadResponse, model, path=["response"]) + + @parametrize + def test_method_upload_with_all_params(self, client: Together) -> None: + model = client.models.upload( + model_name="Qwen2.5-72B-Instruct", + model_source="unsloth/Qwen2.5-72B-Instruct", + base_model="Qwen/Qwen2.5-72B-Instruct", + description="Finetuned Qwen2.5-72B-Instruct by Unsloth", + hf_token="hf_examplehuggingfacetoken", + lora_model="my_username/Qwen2.5-72B-Instruct-lora", + model_type="model", + ) + assert_matches_type(ModelUploadResponse, model, path=["response"]) + + @parametrize + def test_raw_response_upload(self, client: Together) -> None: + response = client.models.with_raw_response.upload( + model_name="Qwen2.5-72B-Instruct", + model_source="unsloth/Qwen2.5-72B-Instruct", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + model = response.parse() + assert_matches_type(ModelUploadResponse, model, path=["response"]) + + @parametrize + def test_streaming_response_upload(self, client: Together) -> None: + with client.models.with_streaming_response.upload( + model_name="Qwen2.5-72B-Instruct", + model_source="unsloth/Qwen2.5-72B-Instruct", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + model = response.parse() + assert_matches_type(ModelUploadResponse, model, path=["response"]) + + assert cast(Any, response.is_closed) is True + class TestAsyncModels: parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) @@ -70,3 +117,50 @@ async def test_streaming_response_list(self, async_client: AsyncTogether) -> Non assert_matches_type(ModelListResponse, model, path=["response"]) assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_upload(self, async_client: AsyncTogether) -> None: + model = await async_client.models.upload( + model_name="Qwen2.5-72B-Instruct", + model_source="unsloth/Qwen2.5-72B-Instruct", + ) + assert_matches_type(ModelUploadResponse, model, path=["response"]) + + @parametrize + async def test_method_upload_with_all_params(self, async_client: AsyncTogether) -> None: + model = await async_client.models.upload( + model_name="Qwen2.5-72B-Instruct", + model_source="unsloth/Qwen2.5-72B-Instruct", + base_model="Qwen/Qwen2.5-72B-Instruct", + description="Finetuned Qwen2.5-72B-Instruct by Unsloth", + hf_token="hf_examplehuggingfacetoken", + lora_model="my_username/Qwen2.5-72B-Instruct-lora", + model_type="model", + ) + assert_matches_type(ModelUploadResponse, model, path=["response"]) + + @parametrize + async def test_raw_response_upload(self, async_client: AsyncTogether) -> None: + response = await async_client.models.with_raw_response.upload( + model_name="Qwen2.5-72B-Instruct", + model_source="unsloth/Qwen2.5-72B-Instruct", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + model = await response.parse() + assert_matches_type(ModelUploadResponse, model, path=["response"]) + + @parametrize + async def test_streaming_response_upload(self, async_client: AsyncTogether) -> None: + async with async_client.models.with_streaming_response.upload( + model_name="Qwen2.5-72B-Instruct", + model_source="unsloth/Qwen2.5-72B-Instruct", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + model = await response.parse() + assert_matches_type(ModelUploadResponse, model, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/conftest.py b/tests/conftest.py index 2262b6e7..b7e86792 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,7 @@ from together import Together, AsyncTogether if TYPE_CHECKING: - from _pytest.fixtures import FixtureRequest + from _pytest.fixtures import FixtureRequest # pyright: ignore[reportPrivateImportUsage] pytest.register_assert_rewrite("tests.utils") diff --git a/tests/test_client.py b/tests/test_client.py index 5daf9572..b8c41bee 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -814,7 +814,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", ) assert response.retries_taken == failures_before_success @@ -846,7 +846,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", extra_headers={"x-stainless-retry-count": Omit()}, ) @@ -878,7 +878,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", extra_headers={"x-stainless-retry-count": "42"}, ) @@ -1657,7 +1657,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", ) assert response.retries_taken == failures_before_success @@ -1690,7 +1690,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", extra_headers={"x-stainless-retry-count": Omit()}, ) @@ -1723,7 +1723,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: "role": "system", } ], - model="Qwen/Qwen2.5-72B-Instruct-Turbo", + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", extra_headers={"x-stainless-retry-count": "42"}, ) @@ -1741,7 +1741,7 @@ def test_get_platform(self) -> None: import threading from together._utils import asyncify - from together._base_client import get_platform + from together._base_client import get_platform async def test_main() -> None: result = await asyncify(get_platform)() diff --git a/tests/test_models.py b/tests/test_models.py index 6fbb96a3..da728846 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -492,12 +492,15 @@ class Model(BaseModel): resource_id: Optional[str] = None m = Model.construct() + assert m.resource_id is None assert "resource_id" not in m.model_fields_set m = Model.construct(resource_id=None) + assert m.resource_id is None assert "resource_id" in m.model_fields_set m = Model.construct(resource_id="foo") + assert m.resource_id == "foo" assert "resource_id" in m.model_fields_set @@ -832,7 +835,7 @@ class B(BaseModel): @pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1") def test_type_alias_type() -> None: - Alias = TypeAliasType("Alias", str) + Alias = TypeAliasType("Alias", str) # pyright: ignore class Model(BaseModel): alias: Alias @@ -854,3 +857,35 @@ class Model(BaseModel): m = construct_type(value={"cls": "foo"}, type_=Model) assert isinstance(m, Model) assert isinstance(m.cls, str) + + +def test_discriminated_union_case() -> None: + class A(BaseModel): + type: Literal["a"] + + data: bool + + class B(BaseModel): + type: Literal["b"] + + data: List[Union[A, object]] + + class ModelA(BaseModel): + type: Literal["modelA"] + + data: int + + class ModelB(BaseModel): + type: Literal["modelB"] + + required: str + + data: Union[A, B] + + # when constructing ModelA | ModelB, value data doesn't match ModelB exactly - missing `required` + m = construct_type( + value={"type": "modelB", "data": {"type": "a", "data": True}}, + type_=cast(Any, Annotated[Union[ModelA, ModelB], PropertyInfo(discriminator="type")]), + ) + + assert isinstance(m, ModelB) diff --git a/tests/test_transform.py b/tests/test_transform.py index 3dda603b..2a34b4f7 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -8,7 +8,7 @@ import pytest -from together._types import Base64FileInput +from together._types import NOT_GIVEN, Base64FileInput from together._utils import ( PropertyInfo, transform as _transform, @@ -432,3 +432,22 @@ async def test_base64_file_input(use_async: bool) -> None: assert await transform({"foo": io.BytesIO(b"Hello, world!")}, TypedDictBase64Input, use_async) == { "foo": "SGVsbG8sIHdvcmxkIQ==" } # type: ignore[comparison-overlap] + + +@parametrize +@pytest.mark.asyncio +async def test_transform_skipping(use_async: bool) -> None: + # lists of ints are left as-is + data = [1, 2, 3] + assert await transform(data, List[int], use_async) is data + + # iterables of ints are converted to a list + data = iter([1, 2, 3]) + assert await transform(data, Iterable[int], use_async) == [1, 2, 3] + + +@parametrize +@pytest.mark.asyncio +async def test_strips_notgiven(use_async: bool) -> None: + assert await transform({"foo_bar": "bar"}, Foo1, use_async) == {"fooBar": "bar"} + assert await transform({"foo_bar": NOT_GIVEN}, Foo1, use_async) == {} diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py index 75a517dd..7c25ecc7 100644 --- a/tests/test_utils/test_proxy.py +++ b/tests/test_utils/test_proxy.py @@ -21,3 +21,14 @@ def test_recursive_proxy() -> None: assert dir(proxy) == [] assert type(proxy).__name__ == "RecursiveLazyProxy" assert type(operator.attrgetter("name.foo.bar.baz")(proxy)).__name__ == "RecursiveLazyProxy" + + +def test_isinstance_does_not_error() -> None: + class AlwaysErrorProxy(LazyProxy[Any]): + @override + def __load__(self) -> Any: + raise RuntimeError("Mocking missing dependency") + + proxy = AlwaysErrorProxy() + assert not isinstance(proxy, dict) + assert isinstance(proxy, LazyProxy)