diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 55d20255..ff261bad 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
USER vscode
-RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.35.0" RYE_INSTALL_OPTION="--yes" bash
+RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.44.0" RYE_INSTALL_OPTION="--yes" bash
ENV PATH=/home/vscode/.rye/shims:$PATH
RUN echo "[[ -d .venv ]] && source .venv/bin/activate || export PATH=\$PATH" >> /home/vscode/.bashrc
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c8a8a4f7..a7180a90 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,18 +1,18 @@
name: CI
on:
push:
- branches:
- - main
- pull_request:
- branches:
- - main
- - next
+ branches-ignore:
+ - 'generated'
+ - 'codegen/**'
+ - 'integrated/**'
+ - 'stl-preview-head/**'
+ - 'stl-preview-base/**'
jobs:
lint:
+ timeout-minutes: 10
name: lint
- runs-on: ubuntu-latest
-
+ runs-on: ${{ github.repository == 'stainless-sdks/togetherai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
steps:
- uses: actions/checkout@v4
@@ -21,7 +21,7 @@ jobs:
curl -sSf https://rye.astral.sh/get | bash
echo "$HOME/.rye/shims" >> $GITHUB_PATH
env:
- RYE_VERSION: '0.35.0'
+ RYE_VERSION: '0.44.0'
RYE_INSTALL_OPTION: '--yes'
- name: Install dependencies
@@ -31,9 +31,9 @@ jobs:
run: ./scripts/lint
test:
+ timeout-minutes: 10
name: test
- runs-on: ubuntu-latest
-
+ runs-on: ${{ github.repository == 'stainless-sdks/togetherai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
steps:
- uses: actions/checkout@v4
@@ -42,7 +42,7 @@ jobs:
curl -sSf https://rye.astral.sh/get | bash
echo "$HOME/.rye/shims" >> $GITHUB_PATH
env:
- RYE_VERSION: '0.35.0'
+ RYE_VERSION: '0.44.0'
RYE_INSTALL_OPTION: '--yes'
- name: Bootstrap
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index fb499434..41ab0074 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -21,7 +21,7 @@ jobs:
curl -sSf https://rye.astral.sh/get | bash
echo "$HOME/.rye/shims" >> $GITHUB_PATH
env:
- RYE_VERSION: '0.35.0'
+ RYE_VERSION: '0.44.0'
RYE_INSTALL_OPTION: '--yes'
- name: Publish to PyPI
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index aaf968a1..b56c3d0b 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
{
- ".": "0.1.0-alpha.3"
+ ".": "0.1.0-alpha.4"
}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index a38bcf77..03c953a6 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,4 @@
-configured_endpoints: 17
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-a11987df1895448b6ccbbc6d931e7db9a0dc3e6de7c6efb237ac9548342e616b.yml
+configured_endpoints: 28
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-8f50cb3e342f2fd67f1f2cfda195b3d78c0740344f55f37cf1c99c66a0f7c2c5.yml
+openapi_spec_hash: b9907745f73f337395ffd5cef1e8a2d5
+config_hash: a60b100624e80dc8d9144e7bc306f5ce
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d57805cf..ffdc48d9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,70 @@
# Changelog
+## 0.1.0-alpha.4 (2025-05-13)
+
+Full Changelog: [v0.1.0-alpha.3...v0.1.0-alpha.4](https://github.com/togethercomputer/together-py/compare/v0.1.0-alpha.3...v0.1.0-alpha.4)
+
+### Features
+
+* **api:** add tci resources ([023b3a0](https://github.com/togethercomputer/together-py/commit/023b3a00991729a0a06845ee7f64f760cf6f4325))
+* **api:** adds unspecified endpoints ([678f58a](https://github.com/togethercomputer/together-py/commit/678f58af8b2be9e65b667cb0b104a9be4b6667f4))
+* **api:** api update ([6d9609e](https://github.com/togethercomputer/together-py/commit/6d9609e279d228ba1aad926914d089904b858c01))
+* **api:** api update ([bb40eb9](https://github.com/togethercomputer/together-py/commit/bb40eb96cbf911f0f9772c98e261988ab1432383))
+* **api:** api update ([271887f](https://github.com/togethercomputer/together-py/commit/271887fe30d8f4b8f0405d16366e1f82868a1d0d))
+* **api:** api update ([2a7de06](https://github.com/togethercomputer/together-py/commit/2a7de06a3a1b5425a1dd553c32390df21b252e21))
+* **api:** api update ([#117](https://github.com/togethercomputer/together-py/issues/117)) ([dd8e841](https://github.com/togethercomputer/together-py/commit/dd8e841d1eaf40a9f143f63f7f4ced0f701b0fbd))
+* **api:** api update ([#120](https://github.com/togethercomputer/together-py/issues/120)) ([adf0e5b](https://github.com/togethercomputer/together-py/commit/adf0e5ba1cd266278cf4503b04cfcd847a97b0e4))
+* **api:** api update ([#121](https://github.com/togethercomputer/together-py/issues/121)) ([0ab0bc9](https://github.com/togethercomputer/together-py/commit/0ab0bc97ca4db4d2d64f3c2f9eeada9ffa37fc97))
+* **api:** api update ([#130](https://github.com/togethercomputer/together-py/issues/130)) ([4f1a7ea](https://github.com/togethercomputer/together-py/commit/4f1a7ea708c55466f4fa3f1698b505ffbfe2aea6))
+* **api:** api update ([#132](https://github.com/togethercomputer/together-py/issues/132)) ([7c8a194](https://github.com/togethercomputer/together-py/commit/7c8a194c4e1f484f8455adce6f56c840411f4946))
+* **api:** api update ([#135](https://github.com/togethercomputer/together-py/issues/135)) ([22a93e9](https://github.com/togethercomputer/together-py/commit/22a93e9c5c7a33c502f5a4c380c576c2a752d6a5))
+
+
+### Bug Fixes
+
+* **ci:** ensure pip is always available ([#127](https://github.com/togethercomputer/together-py/issues/127)) ([4da2bc0](https://github.com/togethercomputer/together-py/commit/4da2bc0bb7cc4516cf0d93032544fbb71025c118))
+* **ci:** remove publishing patch ([#128](https://github.com/togethercomputer/together-py/issues/128)) ([6bd4d6f](https://github.com/togethercomputer/together-py/commit/6bd4d6f8d8f8842f56cdbb56df0a4d5e5227dde4))
+* **client:** correct type to enum ([#129](https://github.com/togethercomputer/together-py/issues/129)) ([8a5fa0e](https://github.com/togethercomputer/together-py/commit/8a5fa0e2858e851756f022943ada948374bb017c))
+* **package:** support direct resource imports ([f59e7c3](https://github.com/togethercomputer/together-py/commit/f59e7c3b3bcc7c076bd8c71b2ab42f8a117e5519))
+* **perf:** optimize some hot paths ([f79734d](https://github.com/togethercomputer/together-py/commit/f79734d809a4a7c18eb8903190e6b4d90d299e45))
+* **perf:** skip traversing types for NotGiven values ([1103dd0](https://github.com/togethercomputer/together-py/commit/1103dd03e7f021deadd0b000b3bff9c5494442b6))
+* **pydantic v1:** more robust ModelField.annotation check ([d380238](https://github.com/togethercomputer/together-py/commit/d3802383e80ad8d3606a1e753c72a20864531332))
+* skip invalid fine-tune test ([#133](https://github.com/togethercomputer/together-py/issues/133)) ([2f41046](https://github.com/togethercomputer/together-py/commit/2f4104625264947305cee0bd26fc38ff290f16ea))
+* **tests:** correctly skip create fine tune tests ([#138](https://github.com/togethercomputer/together-py/issues/138)) ([47c9cae](https://github.com/togethercomputer/together-py/commit/47c9cae7da9caee8de3ba7480b784fc5d168e1b0))
+* **types:** handle more discriminated union shapes ([#126](https://github.com/togethercomputer/together-py/issues/126)) ([2483c76](https://github.com/togethercomputer/together-py/commit/2483c76ee0cf06ee7a1819446cfa4fa349958da4))
+
+
+### Chores
+
+* broadly detect json family of content-type headers ([6e2421e](https://github.com/togethercomputer/together-py/commit/6e2421e126e74b4bcc7bc2aaef07a078bdd1e0ea))
+* **ci:** add timeout thresholds for CI jobs ([2425c53](https://github.com/togethercomputer/together-py/commit/2425c53723d34959380d44131d607ded5a665004))
+* **ci:** only use depot for staging repos ([2dfe569](https://github.com/togethercomputer/together-py/commit/2dfe569cf72f74a97fbe1e282c9d079c371d32aa))
+* **ci:** run on more branches and use depot runners ([3c61f56](https://github.com/togethercomputer/together-py/commit/3c61f565633c395dba16fda924c241910145c13c))
+* **client:** minor internal fixes ([f6f5174](https://github.com/togethercomputer/together-py/commit/f6f5174c6ec0b9a3a4decfc25737efbbb52bffe5))
+* fix typos ([#131](https://github.com/togethercomputer/together-py/issues/131)) ([dedf3ad](https://github.com/togethercomputer/together-py/commit/dedf3adb709255ba9303e29354b013db8a8520b9))
+* **internal:** avoid errors for isinstance checks on proxies ([8b81509](https://github.com/togethercomputer/together-py/commit/8b81509faac153ee4a33b3460c17759e2465dfcd))
+* **internal:** base client updates ([890efc3](https://github.com/togethercomputer/together-py/commit/890efc36f00553025237601bad51f3f0a906376b))
+* **internal:** bump pyright version ([01e104a](https://github.com/togethercomputer/together-py/commit/01e104a2bba92c77ef610cf48720d8a2785ff39b))
+* **internal:** bump rye to 0.44.0 ([#124](https://github.com/togethercomputer/together-py/issues/124)) ([e8c3dc3](https://github.com/togethercomputer/together-py/commit/e8c3dc3be0e56d7c4e7a48d8f824a88878e0c981))
+* **internal:** codegen related update ([#125](https://github.com/togethercomputer/together-py/issues/125)) ([5e83e04](https://github.com/togethercomputer/together-py/commit/5e83e043b3f62c38fa13c72d54278e845c2df46a))
+* **internal:** expand CI branch coverage ([#139](https://github.com/togethercomputer/together-py/issues/139)) ([2db8ca2](https://github.com/togethercomputer/together-py/commit/2db8ca2b6d063b136e9cb50c3991a11f6f47e4fb))
+* **internal:** fix list file params ([8a8dcd3](https://github.com/togethercomputer/together-py/commit/8a8dcd384e480c52358460ba662a48311a415cfb))
+* **internal:** import reformatting ([49f361b](https://github.com/togethercomputer/together-py/commit/49f361bf9d548ca45a01e31972b5db797752e481))
+* **internal:** minor formatting changes ([33e3a75](https://github.com/togethercomputer/together-py/commit/33e3a751bd9f3382e5e462bbcf92a212e14d26ff))
+* **internal:** reduce CI branch coverage ([6f6ac97](https://github.com/togethercomputer/together-py/commit/6f6ac973e36bdeb28883ff6281228c67f76c55a1))
+* **internal:** refactor retries to not use recursion ([ffb0eb4](https://github.com/togethercomputer/together-py/commit/ffb0eb46712544a86f01eaa842ac13f085e37fee))
+* **internal:** remove extra empty newlines ([#122](https://github.com/togethercomputer/together-py/issues/122)) ([b0cbbaa](https://github.com/togethercomputer/together-py/commit/b0cbbaa10e003e84cf2c8c23ef05baa6bc9d4e82))
+* **internal:** remove trailing character ([#134](https://github.com/togethercomputer/together-py/issues/134)) ([f09c6cb](https://github.com/togethercomputer/together-py/commit/f09c6cb1620997e72b99bc918d77ae9a2be9e8b3))
+* **internal:** slight transform perf improvement ([#136](https://github.com/togethercomputer/together-py/issues/136)) ([d31383c](https://github.com/togethercomputer/together-py/commit/d31383c0f8fb1749381fad871aa60bd0eaad3e03))
+* **internal:** update models test ([b64d4cc](https://github.com/togethercomputer/together-py/commit/b64d4cc9a1424fa7f46088e51306b877afba3fae))
+* **internal:** update pyright settings ([05720d5](https://github.com/togethercomputer/together-py/commit/05720d5b0b7387fbe3b04975dfa6b764898a7a02))
+* **tests:** improve enum examples ([#137](https://github.com/togethercomputer/together-py/issues/137)) ([4c3e75d](https://github.com/togethercomputer/together-py/commit/4c3e75d5aa75421d4aca257c0df89d24e2db264e))
+
+
+### Documentation
+
+* revise readme docs about nested params ([#118](https://github.com/togethercomputer/together-py/issues/118)) ([0eefffd](https://github.com/togethercomputer/together-py/commit/0eefffd623bc692f2e03fd299b9b05c3bb88bf53))
+
## 0.1.0-alpha.3 (2025-03-05)
Full Changelog: [v0.1.0-alpha.2...v0.1.0-alpha.3](https://github.com/togethercomputer/together-py/compare/v0.1.0-alpha.2...v0.1.0-alpha.3)
diff --git a/README.md b/README.md
index 1c6e44f5..fa7c96af 100644
--- a/README.md
+++ b/README.md
@@ -136,6 +136,31 @@ Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typ
Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`.
+## Nested params
+
+Nested parameters are dictionaries, typed using `TypedDict`, for example:
+
+```python
+from together import Together
+
+client = Together()
+
+chat_completion = client.chat.completions.create(
+ messages=[
+ {
+ "content": "string",
+ "role": "system",
+ }
+ ],
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+ response_format={
+ "schema": {"foo": "bar"},
+ "type": "json",
+ },
+)
+print(chat_completion.response_format)
+```
+
## Handling errors
When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `together.APIConnectionError` is raised.
diff --git a/api.md b/api.md
index 079efc3b..d1cb335c 100644
--- a/api.md
+++ b/api.md
@@ -86,6 +86,30 @@ Methods:
- client.fine_tune.download(\*\*params) -> FineTuneDownloadResponse
- client.fine_tune.list_events(id) -> FineTuneEvent
+# CodeInterpreter
+
+Types:
+
+```python
+from together.types import ExecuteResponse
+```
+
+Methods:
+
+- client.code_interpreter.execute(\*\*params) -> ExecuteResponse
+
+## Sessions
+
+Types:
+
+```python
+from together.types.code_interpreter import SessionListResponse
+```
+
+Methods:
+
+- client.code_interpreter.sessions.list() -> SessionListResponse
+
# Images
Types:
@@ -115,9 +139,56 @@ Methods:
Types:
```python
-from together.types import ModelListResponse
+from together.types import ModelListResponse, ModelUploadResponse
```
Methods:
- client.models.list() -> ModelListResponse
+- client.models.upload(\*\*params) -> ModelUploadResponse
+
+# Jobs
+
+Types:
+
+```python
+from together.types import JobRetrieveResponse, JobListResponse
+```
+
+Methods:
+
+- client.jobs.retrieve(job_id) -> JobRetrieveResponse
+- client.jobs.list() -> JobListResponse
+
+# Endpoints
+
+Types:
+
+```python
+from together.types import (
+ EndpointCreateResponse,
+ EndpointRetrieveResponse,
+ EndpointUpdateResponse,
+ EndpointListResponse,
+)
+```
+
+Methods:
+
+- client.endpoints.create(\*\*params) -> EndpointCreateResponse
+- client.endpoints.retrieve(endpoint_id) -> EndpointRetrieveResponse
+- client.endpoints.update(endpoint_id, \*\*params) -> EndpointUpdateResponse
+- client.endpoints.list(\*\*params) -> EndpointListResponse
+- client.endpoints.delete(endpoint_id) -> None
+
+# Hardware
+
+Types:
+
+```python
+from together.types import HardwareListResponse
+```
+
+Methods:
+
+- client.hardware.list(\*\*params) -> HardwareListResponse
diff --git a/bin/publish-pypi b/bin/publish-pypi
index 05bfccbb..826054e9 100644
--- a/bin/publish-pypi
+++ b/bin/publish-pypi
@@ -3,7 +3,4 @@
set -eux
mkdir -p dist
rye build --clean
-# Patching importlib-metadata version until upstream library version is updated
-# https://github.com/pypa/twine/issues/977#issuecomment-2189800841
-"$HOME/.rye/self/bin/python3" -m pip install 'importlib-metadata==7.2.1'
rye publish --yes --token=$PYPI_TOKEN
diff --git a/pyproject.toml b/pyproject.toml
index 6abb7a59..943b01f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "together"
-version = "0.1.0-alpha.3"
+version = "0.1.0-alpha.4"
description = "The official Python library for the together API"
dynamic = ["readme"]
license = "Apache-2.0"
@@ -50,7 +50,7 @@ together = "together.lib.cli.cli:main"
managed = true
# version pins are in requirements-dev.lock
dev-dependencies = [
- "pyright>=1.1.359",
+ "pyright==1.1.399",
"mypy",
"respx",
"pytest",
@@ -94,7 +94,7 @@ typecheck = { chain = [
"typecheck:mypy" = "mypy ."
[build-system]
-requires = ["hatchling", "hatch-fancy-pypi-readme"]
+requires = ["hatchling==1.26.3", "hatch-fancy-pypi-readme"]
build-backend = "hatchling.build"
[tool.hatch.build]
@@ -155,11 +155,11 @@ exclude = [
]
reportImplicitOverride = true
+reportOverlappingOverload = false
reportImportCycles = false
reportPrivateUsage = false
-
[tool.ruff]
line-length = 120
output-format = "grouped"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 76cd6127..addb0f1e 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -7,6 +7,7 @@
# all-features: true
# with-sources: false
# generate-hashes: false
+# universal: false
-e file:.
annotated-types==0.6.0
@@ -80,7 +81,7 @@ pydantic-core==2.27.1
# via pydantic
pygments==2.18.0
# via rich
-pyright==1.1.392.post0
+pyright==1.1.399
pytest==8.3.3
# via pytest-asyncio
pytest-asyncio==0.24.0
diff --git a/requirements.lock b/requirements.lock
index 04389e74..1d8f9198 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -7,6 +7,7 @@
# all-features: true
# with-sources: false
# generate-hashes: false
+# universal: false
-e file:.
annotated-types==0.6.0
diff --git a/scripts/test b/scripts/test
index 4fa5698b..2b878456 100755
--- a/scripts/test
+++ b/scripts/test
@@ -52,6 +52,8 @@ else
echo
fi
+export DEFER_PYDANTIC_BUILD=false
+
echo "==> Running tests"
rye run pytest "$@"
diff --git a/src/together/__init__.py b/src/together/__init__.py
index bbaaebad..8adebcd9 100644
--- a/src/together/__init__.py
+++ b/src/together/__init__.py
@@ -1,5 +1,7 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+import typing as _t
+
from . import types
from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes
from ._utils import file_from_path
@@ -78,6 +80,9 @@
"DefaultAsyncHttpxClient",
]
+if not _t.TYPE_CHECKING:
+ from ._utils._resources_proxy import resources as resources
+
_setup_logging()
# Update the __module__ attribute for exported symbols so that
diff --git a/src/together/_base_client.py b/src/together/_base_client.py
index 2e2c8939..ee2f5115 100644
--- a/src/together/_base_client.py
+++ b/src/together/_base_client.py
@@ -98,7 +98,11 @@
_AsyncStreamT = TypeVar("_AsyncStreamT", bound=AsyncStream[Any])
if TYPE_CHECKING:
- from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+ from httpx._config import (
+ DEFAULT_TIMEOUT_CONFIG, # pyright: ignore[reportPrivateImportUsage]
+ )
+
+ HTTPX_DEFAULT_TIMEOUT = DEFAULT_TIMEOUT_CONFIG
else:
try:
from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
@@ -115,6 +119,7 @@ class PageInfo:
url: URL | NotGiven
params: Query | NotGiven
+ json: Body | NotGiven
@overload
def __init__(
@@ -130,19 +135,30 @@ def __init__(
params: Query,
) -> None: ...
+ @overload
+ def __init__(
+ self,
+ *,
+ json: Body,
+ ) -> None: ...
+
def __init__(
self,
*,
url: URL | NotGiven = NOT_GIVEN,
+ json: Body | NotGiven = NOT_GIVEN,
params: Query | NotGiven = NOT_GIVEN,
) -> None:
self.url = url
+ self.json = json
self.params = params
@override
def __repr__(self) -> str:
if self.url:
return f"{self.__class__.__name__}(url={self.url})"
+ if self.json:
+ return f"{self.__class__.__name__}(json={self.json})"
return f"{self.__class__.__name__}(params={self.params})"
@@ -191,6 +207,19 @@ def _info_to_options(self, info: PageInfo) -> FinalRequestOptions:
options.url = str(url)
return options
+ if not isinstance(info.json, NotGiven):
+ if not is_mapping(info.json):
+ raise TypeError("Pagination is only supported with mappings")
+
+ if not options.json_data:
+ options.json_data = {**info.json}
+ else:
+ if not is_mapping(options.json_data):
+ raise TypeError("Pagination is only supported with mappings")
+
+ options.json_data = {**options.json_data, **info.json}
+ return options
+
raise ValueError("Unexpected PageInfo state")
@@ -408,8 +437,8 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0
headers = httpx.Headers(headers_dict)
idempotency_header = self._idempotency_header
- if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
- headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
+ if idempotency_header and options.idempotency_key and idempotency_header not in headers:
+ headers[idempotency_header] = options.idempotency_key
# Don't set these headers if they were already set or removed by the caller. We check
# `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
@@ -873,7 +902,6 @@ def request(
self,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
- remaining_retries: Optional[int] = None,
*,
stream: Literal[True],
stream_cls: Type[_StreamT],
@@ -884,7 +912,6 @@ def request(
self,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
- remaining_retries: Optional[int] = None,
*,
stream: Literal[False] = False,
) -> ResponseT: ...
@@ -894,7 +921,6 @@ def request(
self,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
- remaining_retries: Optional[int] = None,
*,
stream: bool = False,
stream_cls: Type[_StreamT] | None = None,
@@ -904,121 +930,109 @@ def request(
self,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
- remaining_retries: Optional[int] = None,
*,
stream: bool = False,
stream_cls: type[_StreamT] | None = None,
) -> ResponseT | _StreamT:
- if remaining_retries is not None:
- retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
- else:
- retries_taken = 0
-
- return self._request(
- cast_to=cast_to,
- options=options,
- stream=stream,
- stream_cls=stream_cls,
- retries_taken=retries_taken,
- )
+ cast_to = self._maybe_override_cast_to(cast_to, options)
- def _request(
- self,
- *,
- cast_to: Type[ResponseT],
- options: FinalRequestOptions,
- retries_taken: int,
- stream: bool,
- stream_cls: type[_StreamT] | None,
- ) -> ResponseT | _StreamT:
# create a copy of the options we were given so that if the
# options are mutated later & we then retry, the retries are
# given the original options
input_options = model_copy(options)
+ if input_options.idempotency_key is None and input_options.method.lower() != "get":
+ # ensure the idempotency key is reused between requests
+ input_options.idempotency_key = self._idempotency_key()
- cast_to = self._maybe_override_cast_to(cast_to, options)
- options = self._prepare_options(options)
-
- remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
- request = self._build_request(options, retries_taken=retries_taken)
- self._prepare_request(request)
-
- kwargs: HttpxSendArgs = {}
- if self.custom_auth is not None:
- kwargs["auth"] = self.custom_auth
+ response: httpx.Response | None = None
+ max_retries = input_options.get_max_retries(self.max_retries)
- log.debug("Sending HTTP Request: %s %s", request.method, request.url)
+ retries_taken = 0
+ for retries_taken in range(max_retries + 1):
+ options = model_copy(input_options)
+ options = self._prepare_options(options)
- try:
- response = self._client.send(
- request,
- stream=stream or self._should_stream_response_body(request=request),
- **kwargs,
- )
- except httpx.TimeoutException as err:
- log.debug("Encountered httpx.TimeoutException", exc_info=True)
+ remaining_retries = max_retries - retries_taken
+ request = self._build_request(options, retries_taken=retries_taken)
+ self._prepare_request(request)
- if remaining_retries > 0:
- return self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- stream=stream,
- stream_cls=stream_cls,
- response_headers=None,
- )
+ kwargs: HttpxSendArgs = {}
+ if self.custom_auth is not None:
+ kwargs["auth"] = self.custom_auth
- log.debug("Raising timeout error")
- raise APITimeoutError(request=request) from err
- except Exception as err:
- log.debug("Encountered Exception", exc_info=True)
+ log.debug("Sending HTTP Request: %s %s", request.method, request.url)
- if remaining_retries > 0:
- return self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- stream=stream,
- stream_cls=stream_cls,
- response_headers=None,
+ response = None
+ try:
+ response = self._client.send(
+ request,
+ stream=stream or self._should_stream_response_body(request=request),
+ **kwargs,
)
+ except httpx.TimeoutException as err:
+ log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+ if remaining_retries > 0:
+ self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=None,
+ )
+ continue
+
+ log.debug("Raising timeout error")
+ raise APITimeoutError(request=request) from err
+ except Exception as err:
+ log.debug("Encountered Exception", exc_info=True)
+
+ if remaining_retries > 0:
+ self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=None,
+ )
+ continue
+
+ log.debug("Raising connection error")
+ raise APIConnectionError(request=request) from err
+
+ log.debug(
+ 'HTTP Response: %s %s "%i %s" %s',
+ request.method,
+ request.url,
+ response.status_code,
+ response.reason_phrase,
+ response.headers,
+ )
- log.debug("Raising connection error")
- raise APIConnectionError(request=request) from err
-
- log.debug(
- 'HTTP Response: %s %s "%i %s" %s',
- request.method,
- request.url,
- response.status_code,
- response.reason_phrase,
- response.headers,
- )
+ try:
+ response.raise_for_status()
+ except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code
+ log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+ if remaining_retries > 0 and self._should_retry(err.response):
+ err.response.close()
+ self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=response,
+ )
+ continue
- try:
- response.raise_for_status()
- except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code
- log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
- if remaining_retries > 0 and self._should_retry(err.response):
- err.response.close()
- return self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- response_headers=err.response.headers,
- stream=stream,
- stream_cls=stream_cls,
- )
+ # If the response is streamed then we need to explicitly read the response
+ # to completion before attempting to access the response text.
+ if not err.response.is_closed:
+ err.response.read()
- # If the response is streamed then we need to explicitly read the response
- # to completion before attempting to access the response text.
- if not err.response.is_closed:
- err.response.read()
+ log.debug("Re-raising status error")
+ raise self._make_status_error_from_response(err.response) from None
- log.debug("Re-raising status error")
- raise self._make_status_error_from_response(err.response) from None
+ break
+ assert response is not None, "could not resolve response (should never happen)"
return self._process_response(
cast_to=cast_to,
options=options,
@@ -1028,37 +1042,20 @@ def _request(
retries_taken=retries_taken,
)
- def _retry_request(
- self,
- options: FinalRequestOptions,
- cast_to: Type[ResponseT],
- *,
- retries_taken: int,
- response_headers: httpx.Headers | None,
- stream: bool,
- stream_cls: type[_StreamT] | None,
- ) -> ResponseT | _StreamT:
- remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+ def _sleep_for_retry(
+ self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+ ) -> None:
+ remaining_retries = max_retries - retries_taken
if remaining_retries == 1:
log.debug("1 retry left")
else:
log.debug("%i retries left", remaining_retries)
- timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
+ timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
log.info("Retrying request to %s in %f seconds", options.url, timeout)
- # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
- # different thread if necessary.
time.sleep(timeout)
- return self._request(
- options=options,
- cast_to=cast_to,
- retries_taken=retries_taken + 1,
- stream=stream,
- stream_cls=stream_cls,
- )
-
def _process_response(
self,
*,
@@ -1402,7 +1399,6 @@ async def request(
options: FinalRequestOptions,
*,
stream: Literal[False] = False,
- remaining_retries: Optional[int] = None,
) -> ResponseT: ...
@overload
@@ -1413,7 +1409,6 @@ async def request(
*,
stream: Literal[True],
stream_cls: type[_AsyncStreamT],
- remaining_retries: Optional[int] = None,
) -> _AsyncStreamT: ...
@overload
@@ -1424,7 +1419,6 @@ async def request(
*,
stream: bool,
stream_cls: type[_AsyncStreamT] | None = None,
- remaining_retries: Optional[int] = None,
) -> ResponseT | _AsyncStreamT: ...
async def request(
@@ -1434,116 +1428,111 @@ async def request(
*,
stream: bool = False,
stream_cls: type[_AsyncStreamT] | None = None,
- remaining_retries: Optional[int] = None,
- ) -> ResponseT | _AsyncStreamT:
- if remaining_retries is not None:
- retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
- else:
- retries_taken = 0
-
- return await self._request(
- cast_to=cast_to,
- options=options,
- stream=stream,
- stream_cls=stream_cls,
- retries_taken=retries_taken,
- )
-
- async def _request(
- self,
- cast_to: Type[ResponseT],
- options: FinalRequestOptions,
- *,
- stream: bool,
- stream_cls: type[_AsyncStreamT] | None,
- retries_taken: int,
) -> ResponseT | _AsyncStreamT:
if self._platform is None:
# `get_platform` can make blocking IO calls so we
# execute it earlier while we are in an async context
self._platform = await asyncify(get_platform)()
+ cast_to = self._maybe_override_cast_to(cast_to, options)
+
# create a copy of the options we were given so that if the
# options are mutated later & we then retry, the retries are
# given the original options
input_options = model_copy(options)
+ if input_options.idempotency_key is None and input_options.method.lower() != "get":
+ # ensure the idempotency key is reused between requests
+ input_options.idempotency_key = self._idempotency_key()
- cast_to = self._maybe_override_cast_to(cast_to, options)
- options = await self._prepare_options(options)
+ response: httpx.Response | None = None
+ max_retries = input_options.get_max_retries(self.max_retries)
- remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
- request = self._build_request(options, retries_taken=retries_taken)
- await self._prepare_request(request)
+ retries_taken = 0
+ for retries_taken in range(max_retries + 1):
+ options = model_copy(input_options)
+ options = await self._prepare_options(options)
- kwargs: HttpxSendArgs = {}
- if self.custom_auth is not None:
- kwargs["auth"] = self.custom_auth
+ remaining_retries = max_retries - retries_taken
+ request = self._build_request(options, retries_taken=retries_taken)
+ await self._prepare_request(request)
- try:
- response = await self._client.send(
- request,
- stream=stream or self._should_stream_response_body(request=request),
- **kwargs,
- )
- except httpx.TimeoutException as err:
- log.debug("Encountered httpx.TimeoutException", exc_info=True)
-
- if remaining_retries > 0:
- return await self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- stream=stream,
- stream_cls=stream_cls,
- response_headers=None,
- )
+ kwargs: HttpxSendArgs = {}
+ if self.custom_auth is not None:
+ kwargs["auth"] = self.custom_auth
- log.debug("Raising timeout error")
- raise APITimeoutError(request=request) from err
- except Exception as err:
- log.debug("Encountered Exception", exc_info=True)
+ log.debug("Sending HTTP Request: %s %s", request.method, request.url)
- if remaining_retries > 0:
- return await self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- stream=stream,
- stream_cls=stream_cls,
- response_headers=None,
+ response = None
+ try:
+ response = await self._client.send(
+ request,
+ stream=stream or self._should_stream_response_body(request=request),
+ **kwargs,
)
+ except httpx.TimeoutException as err:
+ log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+ if remaining_retries > 0:
+ await self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=None,
+ )
+ continue
+
+ log.debug("Raising timeout error")
+ raise APITimeoutError(request=request) from err
+ except Exception as err:
+ log.debug("Encountered Exception", exc_info=True)
+
+ if remaining_retries > 0:
+ await self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=None,
+ )
+ continue
+
+ log.debug("Raising connection error")
+ raise APIConnectionError(request=request) from err
+
+ log.debug(
+ 'HTTP Response: %s %s "%i %s" %s',
+ request.method,
+ request.url,
+ response.status_code,
+ response.reason_phrase,
+ response.headers,
+ )
- log.debug("Raising connection error")
- raise APIConnectionError(request=request) from err
+ try:
+ response.raise_for_status()
+ except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code
+ log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+ if remaining_retries > 0 and self._should_retry(err.response):
+ await err.response.aclose()
+ await self._sleep_for_retry(
+ retries_taken=retries_taken,
+ max_retries=max_retries,
+ options=input_options,
+ response=response,
+ )
+ continue
- log.debug(
- 'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
- )
+ # If the response is streamed then we need to explicitly read the response
+ # to completion before attempting to access the response text.
+ if not err.response.is_closed:
+ await err.response.aread()
- try:
- response.raise_for_status()
- except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code
- log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
- if remaining_retries > 0 and self._should_retry(err.response):
- await err.response.aclose()
- return await self._retry_request(
- input_options,
- cast_to,
- retries_taken=retries_taken,
- response_headers=err.response.headers,
- stream=stream,
- stream_cls=stream_cls,
- )
+ log.debug("Re-raising status error")
+ raise self._make_status_error_from_response(err.response) from None
- # If the response is streamed then we need to explicitly read the response
- # to completion before attempting to access the response text.
- if not err.response.is_closed:
- await err.response.aread()
-
- log.debug("Re-raising status error")
- raise self._make_status_error_from_response(err.response) from None
+ break
+ assert response is not None, "could not resolve response (should never happen)"
return await self._process_response(
cast_to=cast_to,
options=options,
@@ -1553,35 +1542,20 @@ async def _request(
retries_taken=retries_taken,
)
- async def _retry_request(
- self,
- options: FinalRequestOptions,
- cast_to: Type[ResponseT],
- *,
- retries_taken: int,
- response_headers: httpx.Headers | None,
- stream: bool,
- stream_cls: type[_AsyncStreamT] | None,
- ) -> ResponseT | _AsyncStreamT:
- remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+ async def _sleep_for_retry(
+ self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+ ) -> None:
+ remaining_retries = max_retries - retries_taken
if remaining_retries == 1:
log.debug("1 retry left")
else:
log.debug("%i retries left", remaining_retries)
- timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
+ timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
log.info("Retrying request to %s in %f seconds", options.url, timeout)
await anyio.sleep(timeout)
- return await self._request(
- options=options,
- cast_to=cast_to,
- retries_taken=retries_taken + 1,
- stream=stream,
- stream_cls=stream_cls,
- )
-
async def _process_response(
self,
*,
diff --git a/src/together/_client.py b/src/together/_client.py
index a37edcc1..f2e64aed 100644
--- a/src/together/_client.py
+++ b/src/together/_client.py
@@ -36,7 +36,7 @@
async_to_raw_response_wrapper,
async_to_streamed_response_wrapper,
)
-from .resources import audio, files, images, models, fine_tune, embeddings, completions
+from .resources import jobs, audio, files, images, models, hardware, endpoints, fine_tune, embeddings, completions
from ._streaming import Stream as Stream, AsyncStream as AsyncStream
from ._exceptions import TogetherError, APIStatusError
from ._base_client import (
@@ -47,6 +47,7 @@
)
from .resources.chat import chat
from .types.rerank_response import RerankResponse
+from .resources.code_interpreter import code_interpreter
__all__ = [
"Timeout",
@@ -66,9 +67,13 @@ class Together(SyncAPIClient):
embeddings: embeddings.EmbeddingsResource
files: files.FilesResource
fine_tune: fine_tune.FineTuneResource
+ code_interpreter: code_interpreter.CodeInterpreterResource
images: images.ImagesResource
audio: audio.AudioResource
models: models.ModelsResource
+ jobs: jobs.JobsResource
+ endpoints: endpoints.EndpointsResource
+ hardware: hardware.HardwareResource
with_raw_response: TogetherWithRawResponse
with_streaming_response: TogetherWithStreamedResponse
@@ -133,9 +138,13 @@ def __init__(
self.embeddings = embeddings.EmbeddingsResource(self)
self.files = files.FilesResource(self)
self.fine_tune = fine_tune.FineTuneResource(self)
+ self.code_interpreter = code_interpreter.CodeInterpreterResource(self)
self.images = images.ImagesResource(self)
self.audio = audio.AudioResource(self)
self.models = models.ModelsResource(self)
+ self.jobs = jobs.JobsResource(self)
+ self.endpoints = endpoints.EndpointsResource(self)
+ self.hardware = hardware.HardwareResource(self)
self.with_raw_response = TogetherWithRawResponse(self)
self.with_streaming_response = TogetherWithStreamedResponse(self)
@@ -312,9 +321,13 @@ class AsyncTogether(AsyncAPIClient):
embeddings: embeddings.AsyncEmbeddingsResource
files: files.AsyncFilesResource
fine_tune: fine_tune.AsyncFineTuneResource
+ code_interpreter: code_interpreter.AsyncCodeInterpreterResource
images: images.AsyncImagesResource
audio: audio.AsyncAudioResource
models: models.AsyncModelsResource
+ jobs: jobs.AsyncJobsResource
+ endpoints: endpoints.AsyncEndpointsResource
+ hardware: hardware.AsyncHardwareResource
with_raw_response: AsyncTogetherWithRawResponse
with_streaming_response: AsyncTogetherWithStreamedResponse
@@ -379,9 +392,13 @@ def __init__(
self.embeddings = embeddings.AsyncEmbeddingsResource(self)
self.files = files.AsyncFilesResource(self)
self.fine_tune = fine_tune.AsyncFineTuneResource(self)
+ self.code_interpreter = code_interpreter.AsyncCodeInterpreterResource(self)
self.images = images.AsyncImagesResource(self)
self.audio = audio.AsyncAudioResource(self)
self.models = models.AsyncModelsResource(self)
+ self.jobs = jobs.AsyncJobsResource(self)
+ self.endpoints = endpoints.AsyncEndpointsResource(self)
+ self.hardware = hardware.AsyncHardwareResource(self)
self.with_raw_response = AsyncTogetherWithRawResponse(self)
self.with_streaming_response = AsyncTogetherWithStreamedResponse(self)
@@ -559,9 +576,13 @@ def __init__(self, client: Together) -> None:
self.embeddings = embeddings.EmbeddingsResourceWithRawResponse(client.embeddings)
self.files = files.FilesResourceWithRawResponse(client.files)
self.fine_tune = fine_tune.FineTuneResourceWithRawResponse(client.fine_tune)
+ self.code_interpreter = code_interpreter.CodeInterpreterResourceWithRawResponse(client.code_interpreter)
self.images = images.ImagesResourceWithRawResponse(client.images)
self.audio = audio.AudioResourceWithRawResponse(client.audio)
self.models = models.ModelsResourceWithRawResponse(client.models)
+ self.jobs = jobs.JobsResourceWithRawResponse(client.jobs)
+ self.endpoints = endpoints.EndpointsResourceWithRawResponse(client.endpoints)
+ self.hardware = hardware.HardwareResourceWithRawResponse(client.hardware)
self.rerank = to_raw_response_wrapper(
client.rerank,
@@ -575,9 +596,13 @@ def __init__(self, client: AsyncTogether) -> None:
self.embeddings = embeddings.AsyncEmbeddingsResourceWithRawResponse(client.embeddings)
self.files = files.AsyncFilesResourceWithRawResponse(client.files)
self.fine_tune = fine_tune.AsyncFineTuneResourceWithRawResponse(client.fine_tune)
+ self.code_interpreter = code_interpreter.AsyncCodeInterpreterResourceWithRawResponse(client.code_interpreter)
self.images = images.AsyncImagesResourceWithRawResponse(client.images)
self.audio = audio.AsyncAudioResourceWithRawResponse(client.audio)
self.models = models.AsyncModelsResourceWithRawResponse(client.models)
+ self.jobs = jobs.AsyncJobsResourceWithRawResponse(client.jobs)
+ self.endpoints = endpoints.AsyncEndpointsResourceWithRawResponse(client.endpoints)
+ self.hardware = hardware.AsyncHardwareResourceWithRawResponse(client.hardware)
self.rerank = async_to_raw_response_wrapper(
client.rerank,
@@ -591,9 +616,13 @@ def __init__(self, client: Together) -> None:
self.embeddings = embeddings.EmbeddingsResourceWithStreamingResponse(client.embeddings)
self.files = files.FilesResourceWithStreamingResponse(client.files)
self.fine_tune = fine_tune.FineTuneResourceWithStreamingResponse(client.fine_tune)
+ self.code_interpreter = code_interpreter.CodeInterpreterResourceWithStreamingResponse(client.code_interpreter)
self.images = images.ImagesResourceWithStreamingResponse(client.images)
self.audio = audio.AudioResourceWithStreamingResponse(client.audio)
self.models = models.ModelsResourceWithStreamingResponse(client.models)
+ self.jobs = jobs.JobsResourceWithStreamingResponse(client.jobs)
+ self.endpoints = endpoints.EndpointsResourceWithStreamingResponse(client.endpoints)
+ self.hardware = hardware.HardwareResourceWithStreamingResponse(client.hardware)
self.rerank = to_streamed_response_wrapper(
client.rerank,
@@ -607,9 +636,15 @@ def __init__(self, client: AsyncTogether) -> None:
self.embeddings = embeddings.AsyncEmbeddingsResourceWithStreamingResponse(client.embeddings)
self.files = files.AsyncFilesResourceWithStreamingResponse(client.files)
self.fine_tune = fine_tune.AsyncFineTuneResourceWithStreamingResponse(client.fine_tune)
+ self.code_interpreter = code_interpreter.AsyncCodeInterpreterResourceWithStreamingResponse(
+ client.code_interpreter
+ )
self.images = images.AsyncImagesResourceWithStreamingResponse(client.images)
self.audio = audio.AsyncAudioResourceWithStreamingResponse(client.audio)
self.models = models.AsyncModelsResourceWithStreamingResponse(client.models)
+ self.jobs = jobs.AsyncJobsResourceWithStreamingResponse(client.jobs)
+ self.endpoints = endpoints.AsyncEndpointsResourceWithStreamingResponse(client.endpoints)
+ self.hardware = hardware.AsyncHardwareResourceWithStreamingResponse(client.hardware)
self.rerank = async_to_streamed_response_wrapper(
client.rerank,
diff --git a/src/together/_models.py b/src/together/_models.py
index c4401ff8..798956f1 100644
--- a/src/together/_models.py
+++ b/src/together/_models.py
@@ -19,7 +19,6 @@
)
import pydantic
-import pydantic.generics
from pydantic.fields import FieldInfo
from ._types import (
@@ -65,7 +64,7 @@
from ._constants import RAW_RESPONSE_HEADER
if TYPE_CHECKING:
- from pydantic_core.core_schema import ModelField, LiteralSchema, ModelFieldsSchema
+ from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema
__all__ = ["BaseModel", "GenericModel"]
@@ -627,8 +626,8 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any,
# Note: if one variant defines an alias then they all should
discriminator_alias = field_info.alias
- if field_info.annotation and is_literal_type(field_info.annotation):
- for entry in get_args(field_info.annotation):
+ if (annotation := getattr(field_info, "annotation", None)) and is_literal_type(annotation):
+ for entry in get_args(annotation):
if isinstance(entry, str):
mapping[entry] = variant
@@ -646,15 +645,18 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any,
def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None:
schema = model.__pydantic_core_schema__
+ if schema["type"] == "definitions":
+ schema = schema["schema"]
+
if schema["type"] != "model":
return None
+ schema = cast("ModelSchema", schema)
fields_schema = schema["schema"]
if fields_schema["type"] != "model-fields":
return None
fields_schema = cast("ModelFieldsSchema", fields_schema)
-
field = fields_schema["fields"].get(field_name)
if not field:
return None
@@ -678,7 +680,7 @@ def set_pydantic_config(typ: Any, config: pydantic.ConfigDict) -> None:
setattr(typ, "__pydantic_config__", config) # noqa: B010
-# our use of subclasssing here causes weirdness for type checkers,
+# our use of subclassing here causes weirdness for type checkers,
# so we just pretend that we don't subclass
if TYPE_CHECKING:
GenericModel = BaseModel
diff --git a/src/together/_response.py b/src/together/_response.py
index fb38392e..7188c3e4 100644
--- a/src/together/_response.py
+++ b/src/together/_response.py
@@ -233,7 +233,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
# split is required to handle cases where additional information is included
# in the response, e.g. application/json; charset=utf-8
content_type, *_ = response.headers.get("content-type", "*").split(";")
- if content_type != "application/json":
+ if not content_type.endswith("json"):
if is_basemodel(cast_to):
try:
data = response.json()
diff --git a/src/together/_utils/_proxy.py b/src/together/_utils/_proxy.py
index ffd883e9..0f239a33 100644
--- a/src/together/_utils/_proxy.py
+++ b/src/together/_utils/_proxy.py
@@ -46,7 +46,10 @@ def __dir__(self) -> Iterable[str]:
@property # type: ignore
@override
def __class__(self) -> type: # pyright: ignore
- proxied = self.__get_proxied__()
+ try:
+ proxied = self.__get_proxied__()
+ except Exception:
+ return type(self)
if issubclass(type(proxied), LazyProxy):
return type(proxied)
return proxied.__class__
diff --git a/src/together/_utils/_resources_proxy.py b/src/together/_utils/_resources_proxy.py
new file mode 100644
index 00000000..68dd27e7
--- /dev/null
+++ b/src/together/_utils/_resources_proxy.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from typing import Any
+from typing_extensions import override
+
+from ._proxy import LazyProxy
+
+
+class ResourcesProxy(LazyProxy[Any]):
+ """A proxy for the `together.resources` module.
+
+ This is used so that we can lazily import `together.resources` only when
+ needed *and* so that users can just import `together` and reference `together.resources`
+ """
+
+ @override
+ def __load__(self) -> Any:
+ import importlib
+
+ mod = importlib.import_module("together.resources")
+ return mod
+
+
+resources = ResourcesProxy().__as_proxied__()
diff --git a/src/together/_utils/_transform.py b/src/together/_utils/_transform.py
index 18afd9d8..b0cc20a7 100644
--- a/src/together/_utils/_transform.py
+++ b/src/together/_utils/_transform.py
@@ -5,13 +5,15 @@
import pathlib
from typing import Any, Mapping, TypeVar, cast
from datetime import date, datetime
-from typing_extensions import Literal, get_args, override, get_type_hints
+from typing_extensions import Literal, get_args, override, get_type_hints as _get_type_hints
import anyio
import pydantic
from ._utils import (
is_list,
+ is_given,
+ lru_cache,
is_mapping,
is_iterable,
)
@@ -108,6 +110,7 @@ class Params(TypedDict, total=False):
return cast(_T, transformed)
+@lru_cache(maxsize=8096)
def _get_annotated_type(type_: type) -> type | None:
"""If the given type is an `Annotated` type then it is returned, if not `None` is returned.
@@ -126,7 +129,7 @@ def _get_annotated_type(type_: type) -> type | None:
def _maybe_transform_key(key: str, type_: type) -> str:
"""Transform the given `data` based on the annotations provided in `type_`.
- Note: this function only looks at `Annotated` types that contain `PropertInfo` metadata.
+ Note: this function only looks at `Annotated` types that contain `PropertyInfo` metadata.
"""
annotated_type = _get_annotated_type(type_)
if annotated_type is None:
@@ -142,6 +145,10 @@ def _maybe_transform_key(key: str, type_: type) -> str:
return key
+def _no_transform_needed(annotation: type) -> bool:
+ return annotation == float or annotation == int
+
+
def _transform_recursive(
data: object,
*,
@@ -184,6 +191,15 @@ def _transform_recursive(
return cast(object, data)
inner_type = extract_type_arg(stripped_type, 0)
+ if _no_transform_needed(inner_type):
+ # for some types there is no need to transform anything, so we can get a small
+ # perf boost from skipping that work.
+ #
+ # but we still need to convert to a list to ensure the data is json-serializable
+ if is_list(data):
+ return data
+ return list(data)
+
return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
if is_union_type(stripped_type):
@@ -245,6 +261,11 @@ def _transform_typeddict(
result: dict[str, object] = {}
annotations = get_type_hints(expected_type, include_extras=True)
for key, value in data.items():
+ if not is_given(value):
+ # we don't need to include `NotGiven` values here as they'll
+ # be stripped out before the request is sent anyway
+ continue
+
type_ = annotations.get(key)
if type_ is None:
# we do not have a type annotation for this field, leave it as is
@@ -332,6 +353,15 @@ async def _async_transform_recursive(
return cast(object, data)
inner_type = extract_type_arg(stripped_type, 0)
+ if _no_transform_needed(inner_type):
+ # for some types there is no need to transform anything, so we can get a small
+ # perf boost from skipping that work.
+ #
+ # but we still need to convert to a list to ensure the data is json-serializable
+ if is_list(data):
+ return data
+ return list(data)
+
return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
if is_union_type(stripped_type):
@@ -393,6 +423,11 @@ async def _async_transform_typeddict(
result: dict[str, object] = {}
annotations = get_type_hints(expected_type, include_extras=True)
for key, value in data.items():
+ if not is_given(value):
+ # we don't need to include `NotGiven` values here as they'll
+ # be stripped out before the request is sent anyway
+ continue
+
type_ = annotations.get(key)
if type_ is None:
# we do not have a type annotation for this field, leave it as is
@@ -400,3 +435,13 @@ async def _async_transform_typeddict(
else:
result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_)
return result
+
+
+@lru_cache(maxsize=8096)
+def get_type_hints(
+ obj: Any,
+ globalns: dict[str, Any] | None = None,
+ localns: Mapping[str, Any] | None = None,
+ include_extras: bool = False,
+) -> dict[str, Any]:
+ return _get_type_hints(obj, globalns=globalns, localns=localns, include_extras=include_extras)
diff --git a/src/together/_utils/_typing.py b/src/together/_utils/_typing.py
index 278749b1..1bac9542 100644
--- a/src/together/_utils/_typing.py
+++ b/src/together/_utils/_typing.py
@@ -13,6 +13,7 @@
get_origin,
)
+from ._utils import lru_cache
from .._types import InheritsGeneric
from .._compat import is_union as _is_union
@@ -66,6 +67,7 @@ def is_type_alias_type(tp: Any, /) -> TypeIs[typing_extensions.TypeAliasType]:
# Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
+@lru_cache(maxsize=8096)
def strip_annotated_type(typ: type) -> type:
if is_required_type(typ) or is_annotated_type(typ):
return strip_annotated_type(cast(type, get_args(typ)[0]))
@@ -108,7 +110,7 @@ class MyResponse(Foo[_T]):
```
"""
cls = cast(object, get_origin(typ) or typ)
- if cls in generic_bases:
+ if cls in generic_bases: # pyright: ignore[reportUnnecessaryContains]
# we're given the class directly
return extract_type_arg(typ, index)
diff --git a/src/together/_utils/_utils.py b/src/together/_utils/_utils.py
index e5811bba..ea3cf3f2 100644
--- a/src/together/_utils/_utils.py
+++ b/src/together/_utils/_utils.py
@@ -72,8 +72,16 @@ def _extract_items(
from .._files import assert_is_file_content
# We have exhausted the path, return the entry we found.
- assert_is_file_content(obj, key=flattened_key)
assert flattened_key is not None
+
+ if is_list(obj):
+ files: list[tuple[str, FileTypes]] = []
+ for entry in obj:
+ assert_is_file_content(entry, key=flattened_key + "[]" if flattened_key else "")
+ files.append((flattened_key + "[]", cast(FileTypes, entry)))
+ return files
+
+ assert_is_file_content(obj, key=flattened_key)
return [(flattened_key, cast(FileTypes, obj))]
index += 1
diff --git a/src/together/_version.py b/src/together/_version.py
index 149ef267..2b3d6409 100644
--- a/src/together/_version.py
+++ b/src/together/_version.py
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
__title__ = "together"
-__version__ = "0.1.0-alpha.3" # x-release-please-version
+__version__ = "0.1.0-alpha.4" # x-release-please-version
diff --git a/src/together/resources/__init__.py b/src/together/resources/__init__.py
index 14f5ae11..bd3e4c51 100644
--- a/src/together/resources/__init__.py
+++ b/src/together/resources/__init__.py
@@ -8,6 +8,14 @@
ChatResourceWithStreamingResponse,
AsyncChatResourceWithStreamingResponse,
)
+from .jobs import (
+ JobsResource,
+ AsyncJobsResource,
+ JobsResourceWithRawResponse,
+ AsyncJobsResourceWithRawResponse,
+ JobsResourceWithStreamingResponse,
+ AsyncJobsResourceWithStreamingResponse,
+)
from .audio import (
AudioResource,
AsyncAudioResource,
@@ -40,6 +48,22 @@
ModelsResourceWithStreamingResponse,
AsyncModelsResourceWithStreamingResponse,
)
+from .hardware import (
+ HardwareResource,
+ AsyncHardwareResource,
+ HardwareResourceWithRawResponse,
+ AsyncHardwareResourceWithRawResponse,
+ HardwareResourceWithStreamingResponse,
+ AsyncHardwareResourceWithStreamingResponse,
+)
+from .endpoints import (
+ EndpointsResource,
+ AsyncEndpointsResource,
+ EndpointsResourceWithRawResponse,
+ AsyncEndpointsResourceWithRawResponse,
+ EndpointsResourceWithStreamingResponse,
+ AsyncEndpointsResourceWithStreamingResponse,
+)
from .fine_tune import (
FineTuneResource,
AsyncFineTuneResource,
@@ -64,6 +88,14 @@
CompletionsResourceWithStreamingResponse,
AsyncCompletionsResourceWithStreamingResponse,
)
+from .code_interpreter import (
+ CodeInterpreterResource,
+ AsyncCodeInterpreterResource,
+ CodeInterpreterResourceWithRawResponse,
+ AsyncCodeInterpreterResourceWithRawResponse,
+ CodeInterpreterResourceWithStreamingResponse,
+ AsyncCodeInterpreterResourceWithStreamingResponse,
+)
__all__ = [
"ChatResource",
@@ -96,6 +128,12 @@
"AsyncFineTuneResourceWithRawResponse",
"FineTuneResourceWithStreamingResponse",
"AsyncFineTuneResourceWithStreamingResponse",
+ "CodeInterpreterResource",
+ "AsyncCodeInterpreterResource",
+ "CodeInterpreterResourceWithRawResponse",
+ "AsyncCodeInterpreterResourceWithRawResponse",
+ "CodeInterpreterResourceWithStreamingResponse",
+ "AsyncCodeInterpreterResourceWithStreamingResponse",
"ImagesResource",
"AsyncImagesResource",
"ImagesResourceWithRawResponse",
@@ -114,4 +152,22 @@
"AsyncModelsResourceWithRawResponse",
"ModelsResourceWithStreamingResponse",
"AsyncModelsResourceWithStreamingResponse",
+ "JobsResource",
+ "AsyncJobsResource",
+ "JobsResourceWithRawResponse",
+ "AsyncJobsResourceWithRawResponse",
+ "JobsResourceWithStreamingResponse",
+ "AsyncJobsResourceWithStreamingResponse",
+ "EndpointsResource",
+ "AsyncEndpointsResource",
+ "EndpointsResourceWithRawResponse",
+ "AsyncEndpointsResourceWithRawResponse",
+ "EndpointsResourceWithStreamingResponse",
+ "AsyncEndpointsResourceWithStreamingResponse",
+ "HardwareResource",
+ "AsyncHardwareResource",
+ "HardwareResourceWithRawResponse",
+ "AsyncHardwareResourceWithRawResponse",
+ "HardwareResourceWithStreamingResponse",
+ "AsyncHardwareResourceWithStreamingResponse",
]
diff --git a/src/together/resources/audio.py b/src/together/resources/audio.py
index c21fc754..7a8b2455 100644
--- a/src/together/resources/audio.py
+++ b/src/together/resources/audio.py
@@ -9,10 +9,7 @@
from ..types import audio_create_params
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
- maybe_transform,
- async_maybe_transform,
-)
+from .._utils import maybe_transform, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py
index be27ee86..9b9b413a 100644
--- a/src/together/resources/chat/completions.py
+++ b/src/together/resources/chat/completions.py
@@ -8,11 +8,7 @@
import httpx
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
- required_args,
- maybe_transform,
- async_maybe_transform,
-)
+from ..._utils import required_args, maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
@@ -515,7 +511,9 @@ def create(
"top_k": top_k,
"top_p": top_p,
},
- completion_create_params.CompletionCreateParams,
+ completion_create_params.CompletionCreateParamsStreaming
+ if stream
+ else completion_create_params.CompletionCreateParamsNonStreaming,
),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -1010,7 +1008,9 @@ async def create(
"top_k": top_k,
"top_p": top_p,
},
- completion_create_params.CompletionCreateParams,
+ completion_create_params.CompletionCreateParamsStreaming
+ if stream
+ else completion_create_params.CompletionCreateParamsNonStreaming,
),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/together/resources/code_interpreter/__init__.py b/src/together/resources/code_interpreter/__init__.py
new file mode 100644
index 00000000..d5e30afe
--- /dev/null
+++ b/src/together/resources/code_interpreter/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .sessions import (
+ SessionsResource,
+ AsyncSessionsResource,
+ SessionsResourceWithRawResponse,
+ AsyncSessionsResourceWithRawResponse,
+ SessionsResourceWithStreamingResponse,
+ AsyncSessionsResourceWithStreamingResponse,
+)
+from .code_interpreter import (
+ CodeInterpreterResource,
+ AsyncCodeInterpreterResource,
+ CodeInterpreterResourceWithRawResponse,
+ AsyncCodeInterpreterResourceWithRawResponse,
+ CodeInterpreterResourceWithStreamingResponse,
+ AsyncCodeInterpreterResourceWithStreamingResponse,
+)
+
+__all__ = [
+ "SessionsResource",
+ "AsyncSessionsResource",
+ "SessionsResourceWithRawResponse",
+ "AsyncSessionsResourceWithRawResponse",
+ "SessionsResourceWithStreamingResponse",
+ "AsyncSessionsResourceWithStreamingResponse",
+ "CodeInterpreterResource",
+ "AsyncCodeInterpreterResource",
+ "CodeInterpreterResourceWithRawResponse",
+ "AsyncCodeInterpreterResourceWithRawResponse",
+ "CodeInterpreterResourceWithStreamingResponse",
+ "AsyncCodeInterpreterResourceWithStreamingResponse",
+]
diff --git a/src/together/resources/code_interpreter/code_interpreter.py b/src/together/resources/code_interpreter/code_interpreter.py
new file mode 100644
index 00000000..8a9b777d
--- /dev/null
+++ b/src/together/resources/code_interpreter/code_interpreter.py
@@ -0,0 +1,258 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, Iterable, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ...types import code_interpreter_execute_params
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from .sessions import (
+ SessionsResource,
+ AsyncSessionsResource,
+ SessionsResourceWithRawResponse,
+ AsyncSessionsResourceWithRawResponse,
+ SessionsResourceWithStreamingResponse,
+ AsyncSessionsResourceWithStreamingResponse,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.execute_response import ExecuteResponse
+
+__all__ = ["CodeInterpreterResource", "AsyncCodeInterpreterResource"]
+
+
+class CodeInterpreterResource(SyncAPIResource):
+ @cached_property
+ def sessions(self) -> SessionsResource:
+ return SessionsResource(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> CodeInterpreterResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return CodeInterpreterResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> CodeInterpreterResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return CodeInterpreterResourceWithStreamingResponse(self)
+
+ def execute(
+ self,
+ *,
+ code: str,
+ language: Literal["python"],
+ files: Iterable[code_interpreter_execute_params.File] | NotGiven = NOT_GIVEN,
+ session_id: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ExecuteResponse:
+ """Executes the given code snippet and returns the output.
+
+ Without a session_id, a
+ new session will be created to run the code. If you do pass in a valid
+ session_id, the code will be run in that session. This is useful for running
+ multiple code snippets in the same environment, because dependencies and similar
+ things are persisted between calls to the same session.
+
+ Args:
+ code: Code snippet to execute.
+
+ language: Programming language for the code to execute. Currently only supports Python,
+ but more will be added.
+
+ files: Files to upload to the session. If present, files will be uploaded before
+ executing the given code.
+
+ session_id: Identifier of the current session. Used to make follow-up calls. Requests will
+ return an error if the session does not belong to the caller or has expired.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return cast(
+ ExecuteResponse,
+ self._post(
+ "/tci/execute",
+ body=maybe_transform(
+ {
+ "code": code,
+ "language": language,
+ "files": files,
+ "session_id": session_id,
+ },
+ code_interpreter_execute_params.CodeInterpreterExecuteParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=cast(Any, ExecuteResponse), # Union types cannot be passed in as arguments in the type system
+ ),
+ )
+
+
+class AsyncCodeInterpreterResource(AsyncAPIResource):
+ @cached_property
+ def sessions(self) -> AsyncSessionsResource:
+ return AsyncSessionsResource(self._client)
+
+ @cached_property
+ def with_raw_response(self) -> AsyncCodeInterpreterResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return AsyncCodeInterpreterResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncCodeInterpreterResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return AsyncCodeInterpreterResourceWithStreamingResponse(self)
+
+ async def execute(
+ self,
+ *,
+ code: str,
+ language: Literal["python"],
+ files: Iterable[code_interpreter_execute_params.File] | NotGiven = NOT_GIVEN,
+ session_id: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ExecuteResponse:
+ """Executes the given code snippet and returns the output.
+
+ Without a session_id, a
+ new session will be created to run the code. If you do pass in a valid
+ session_id, the code will be run in that session. This is useful for running
+ multiple code snippets in the same environment, because dependencies and similar
+ things are persisted between calls to the same session.
+
+ Args:
+ code: Code snippet to execute.
+
+ language: Programming language for the code to execute. Currently only supports Python,
+ but more will be added.
+
+ files: Files to upload to the session. If present, files will be uploaded before
+ executing the given code.
+
+ session_id: Identifier of the current session. Used to make follow-up calls. Requests will
+ return an error if the session does not belong to the caller or has expired.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return cast(
+ ExecuteResponse,
+ await self._post(
+ "/tci/execute",
+ body=await async_maybe_transform(
+ {
+ "code": code,
+ "language": language,
+ "files": files,
+ "session_id": session_id,
+ },
+ code_interpreter_execute_params.CodeInterpreterExecuteParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=cast(Any, ExecuteResponse), # Union types cannot be passed in as arguments in the type system
+ ),
+ )
+
+
+class CodeInterpreterResourceWithRawResponse:
+ def __init__(self, code_interpreter: CodeInterpreterResource) -> None:
+ self._code_interpreter = code_interpreter
+
+ self.execute = to_raw_response_wrapper(
+ code_interpreter.execute,
+ )
+
+ @cached_property
+ def sessions(self) -> SessionsResourceWithRawResponse:
+ return SessionsResourceWithRawResponse(self._code_interpreter.sessions)
+
+
+class AsyncCodeInterpreterResourceWithRawResponse:
+ def __init__(self, code_interpreter: AsyncCodeInterpreterResource) -> None:
+ self._code_interpreter = code_interpreter
+
+ self.execute = async_to_raw_response_wrapper(
+ code_interpreter.execute,
+ )
+
+ @cached_property
+ def sessions(self) -> AsyncSessionsResourceWithRawResponse:
+ return AsyncSessionsResourceWithRawResponse(self._code_interpreter.sessions)
+
+
+class CodeInterpreterResourceWithStreamingResponse:
+ def __init__(self, code_interpreter: CodeInterpreterResource) -> None:
+ self._code_interpreter = code_interpreter
+
+ self.execute = to_streamed_response_wrapper(
+ code_interpreter.execute,
+ )
+
+ @cached_property
+ def sessions(self) -> SessionsResourceWithStreamingResponse:
+ return SessionsResourceWithStreamingResponse(self._code_interpreter.sessions)
+
+
+class AsyncCodeInterpreterResourceWithStreamingResponse:
+ def __init__(self, code_interpreter: AsyncCodeInterpreterResource) -> None:
+ self._code_interpreter = code_interpreter
+
+ self.execute = async_to_streamed_response_wrapper(
+ code_interpreter.execute,
+ )
+
+ @cached_property
+ def sessions(self) -> AsyncSessionsResourceWithStreamingResponse:
+ return AsyncSessionsResourceWithStreamingResponse(self._code_interpreter.sessions)
diff --git a/src/together/resources/code_interpreter/sessions.py b/src/together/resources/code_interpreter/sessions.py
new file mode 100644
index 00000000..c4f3a8b0
--- /dev/null
+++ b/src/together/resources/code_interpreter/sessions.py
@@ -0,0 +1,135 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.code_interpreter.session_list_response import SessionListResponse
+
+__all__ = ["SessionsResource", "AsyncSessionsResource"]
+
+
+class SessionsResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> SessionsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return SessionsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> SessionsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return SessionsResourceWithStreamingResponse(self)
+
+ def list(
+ self,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SessionListResponse:
+ """Lists all your currently active sessions."""
+ return self._get(
+ "/tci/sessions",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=SessionListResponse,
+ )
+
+
+class AsyncSessionsResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncSessionsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return AsyncSessionsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncSessionsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return AsyncSessionsResourceWithStreamingResponse(self)
+
+ async def list(
+ self,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> SessionListResponse:
+ """Lists all your currently active sessions."""
+ return await self._get(
+ "/tci/sessions",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=SessionListResponse,
+ )
+
+
+class SessionsResourceWithRawResponse:
+ def __init__(self, sessions: SessionsResource) -> None:
+ self._sessions = sessions
+
+ self.list = to_raw_response_wrapper(
+ sessions.list,
+ )
+
+
+class AsyncSessionsResourceWithRawResponse:
+ def __init__(self, sessions: AsyncSessionsResource) -> None:
+ self._sessions = sessions
+
+ self.list = async_to_raw_response_wrapper(
+ sessions.list,
+ )
+
+
+class SessionsResourceWithStreamingResponse:
+ def __init__(self, sessions: SessionsResource) -> None:
+ self._sessions = sessions
+
+ self.list = to_streamed_response_wrapper(
+ sessions.list,
+ )
+
+
+class AsyncSessionsResourceWithStreamingResponse:
+ def __init__(self, sessions: AsyncSessionsResource) -> None:
+ self._sessions = sessions
+
+ self.list = async_to_streamed_response_wrapper(
+ sessions.list,
+ )
diff --git a/src/together/resources/completions.py b/src/together/resources/completions.py
index 9f1a0ba0..d201dcd2 100644
--- a/src/together/resources/completions.py
+++ b/src/together/resources/completions.py
@@ -9,11 +9,7 @@
from ..types import completion_create_params
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
- required_args,
- maybe_transform,
- async_maybe_transform,
-)
+from .._utils import required_args, maybe_transform, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
@@ -442,7 +438,9 @@ def create(
"top_k": top_k,
"top_p": top_p,
},
- completion_create_params.CompletionCreateParams,
+ completion_create_params.CompletionCreateParamsStreaming
+ if stream
+ else completion_create_params.CompletionCreateParamsNonStreaming,
),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -866,7 +864,9 @@ async def create(
"top_k": top_k,
"top_p": top_p,
},
- completion_create_params.CompletionCreateParams,
+ completion_create_params.CompletionCreateParamsStreaming
+ if stream
+ else completion_create_params.CompletionCreateParamsNonStreaming,
),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/together/resources/embeddings.py b/src/together/resources/embeddings.py
index 936cfbc1..36688cb3 100644
--- a/src/together/resources/embeddings.py
+++ b/src/together/resources/embeddings.py
@@ -9,10 +9,7 @@
from ..types import embedding_create_params
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
- maybe_transform,
- async_maybe_transform,
-)
+from .._utils import maybe_transform, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
diff --git a/src/together/resources/endpoints.py b/src/together/resources/endpoints.py
new file mode 100644
index 00000000..933c1fc5
--- /dev/null
+++ b/src/together/resources/endpoints.py
@@ -0,0 +1,627 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ..types import endpoint_list_params, endpoint_create_params, endpoint_update_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.endpoint_list_response import EndpointListResponse
+from ..types.endpoint_create_response import EndpointCreateResponse
+from ..types.endpoint_update_response import EndpointUpdateResponse
+from ..types.endpoint_retrieve_response import EndpointRetrieveResponse
+
+__all__ = ["EndpointsResource", "AsyncEndpointsResource"]
+
+
+class EndpointsResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> EndpointsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return EndpointsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> EndpointsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return EndpointsResourceWithStreamingResponse(self)
+
+ def create(
+ self,
+ *,
+ autoscaling: endpoint_create_params.Autoscaling,
+ hardware: str,
+ model: str,
+ disable_prompt_cache: bool | NotGiven = NOT_GIVEN,
+ disable_speculative_decoding: bool | NotGiven = NOT_GIVEN,
+ display_name: str | NotGiven = NOT_GIVEN,
+ inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN,
+ state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EndpointCreateResponse:
+ """Creates a new dedicated endpoint for serving models.
+
+ The endpoint will
+ automatically start after creation. You can deploy any supported model on
+ hardware configurations that meet the model's requirements.
+
+ Args:
+ autoscaling: Configuration for automatic scaling of the endpoint
+
+ hardware: The hardware configuration to use for this endpoint
+
+ model: The model to deploy on this endpoint
+
+ disable_prompt_cache: Whether to disable the prompt cache for this endpoint
+
+ disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
+
+ display_name: A human-readable name for the endpoint
+
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+ automatically stopped. Set to null, omit or set to 0 to disable automatic
+ timeout.
+
+ state: The desired state of the endpoint
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/endpoints",
+ body=maybe_transform(
+ {
+ "autoscaling": autoscaling,
+ "hardware": hardware,
+ "model": model,
+ "disable_prompt_cache": disable_prompt_cache,
+ "disable_speculative_decoding": disable_speculative_decoding,
+ "display_name": display_name,
+ "inactive_timeout": inactive_timeout,
+ "state": state,
+ },
+ endpoint_create_params.EndpointCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EndpointCreateResponse,
+ )
+
+ def retrieve(
+ self,
+ endpoint_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EndpointRetrieveResponse:
+ """
+ Retrieves details about a specific endpoint, including its current state,
+ configuration, and scaling settings.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not endpoint_id:
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+ return self._get(
+ f"/endpoints/{endpoint_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EndpointRetrieveResponse,
+ )
+
+ def update(
+ self,
+ endpoint_id: str,
+ *,
+ autoscaling: endpoint_update_params.Autoscaling | NotGiven = NOT_GIVEN,
+ display_name: str | NotGiven = NOT_GIVEN,
+ inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN,
+ state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EndpointUpdateResponse:
+ """Updates an existing endpoint's configuration.
+
+ You can modify the display name,
+ autoscaling settings, or change the endpoint's state (start/stop).
+
+ Args:
+ autoscaling: New autoscaling configuration for the endpoint
+
+ display_name: A human-readable name for the endpoint
+
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+ automatically stopped. Set to 0 to disable automatic timeout.
+
+ state: The desired state of the endpoint
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not endpoint_id:
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+ return self._patch(
+ f"/endpoints/{endpoint_id}",
+ body=maybe_transform(
+ {
+ "autoscaling": autoscaling,
+ "display_name": display_name,
+ "inactive_timeout": inactive_timeout,
+ "state": state,
+ },
+ endpoint_update_params.EndpointUpdateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EndpointUpdateResponse,
+ )
+
+ def list(
+ self,
+ *,
+ type: Literal["dedicated", "serverless"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EndpointListResponse:
+ """Returns a list of all endpoints associated with your account.
+
+ You can filter the
+ results by type (dedicated or serverless).
+
+ Args:
+ type: Filter endpoints by type
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._get(
+ "/endpoints",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform({"type": type}, endpoint_list_params.EndpointListParams),
+ ),
+ cast_to=EndpointListResponse,
+ )
+
+ def delete(
+ self,
+ endpoint_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """Permanently deletes an endpoint.
+
+ This action cannot be undone.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not endpoint_id:
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+ extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+ return self._delete(
+ f"/endpoints/{endpoint_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=NoneType,
+ )
+
+
+class AsyncEndpointsResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncEndpointsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return AsyncEndpointsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncEndpointsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return AsyncEndpointsResourceWithStreamingResponse(self)
+
+ async def create(
+ self,
+ *,
+ autoscaling: endpoint_create_params.Autoscaling,
+ hardware: str,
+ model: str,
+ disable_prompt_cache: bool | NotGiven = NOT_GIVEN,
+ disable_speculative_decoding: bool | NotGiven = NOT_GIVEN,
+ display_name: str | NotGiven = NOT_GIVEN,
+ inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN,
+ state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EndpointCreateResponse:
+ """Creates a new dedicated endpoint for serving models.
+
+ The endpoint will
+ automatically start after creation. You can deploy any supported model on
+ hardware configurations that meet the model's requirements.
+
+ Args:
+ autoscaling: Configuration for automatic scaling of the endpoint
+
+ hardware: The hardware configuration to use for this endpoint
+
+ model: The model to deploy on this endpoint
+
+ disable_prompt_cache: Whether to disable the prompt cache for this endpoint
+
+ disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
+
+ display_name: A human-readable name for the endpoint
+
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+ automatically stopped. Set to null, omit or set to 0 to disable automatic
+ timeout.
+
+ state: The desired state of the endpoint
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/endpoints",
+ body=await async_maybe_transform(
+ {
+ "autoscaling": autoscaling,
+ "hardware": hardware,
+ "model": model,
+ "disable_prompt_cache": disable_prompt_cache,
+ "disable_speculative_decoding": disable_speculative_decoding,
+ "display_name": display_name,
+ "inactive_timeout": inactive_timeout,
+ "state": state,
+ },
+ endpoint_create_params.EndpointCreateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EndpointCreateResponse,
+ )
+
+ async def retrieve(
+ self,
+ endpoint_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EndpointRetrieveResponse:
+ """
+ Retrieves details about a specific endpoint, including its current state,
+ configuration, and scaling settings.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not endpoint_id:
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+ return await self._get(
+ f"/endpoints/{endpoint_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EndpointRetrieveResponse,
+ )
+
+ async def update(
+ self,
+ endpoint_id: str,
+ *,
+ autoscaling: endpoint_update_params.Autoscaling | NotGiven = NOT_GIVEN,
+ display_name: str | NotGiven = NOT_GIVEN,
+ inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN,
+ state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EndpointUpdateResponse:
+ """Updates an existing endpoint's configuration.
+
+ You can modify the display name,
+ autoscaling settings, or change the endpoint's state (start/stop).
+
+ Args:
+ autoscaling: New autoscaling configuration for the endpoint
+
+ display_name: A human-readable name for the endpoint
+
+ inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+ automatically stopped. Set to 0 to disable automatic timeout.
+
+ state: The desired state of the endpoint
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not endpoint_id:
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+ return await self._patch(
+ f"/endpoints/{endpoint_id}",
+ body=await async_maybe_transform(
+ {
+ "autoscaling": autoscaling,
+ "display_name": display_name,
+ "inactive_timeout": inactive_timeout,
+ "state": state,
+ },
+ endpoint_update_params.EndpointUpdateParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=EndpointUpdateResponse,
+ )
+
+ async def list(
+ self,
+ *,
+ type: Literal["dedicated", "serverless"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> EndpointListResponse:
+ """Returns a list of all endpoints associated with your account.
+
+ You can filter the
+ results by type (dedicated or serverless).
+
+ Args:
+ type: Filter endpoints by type
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._get(
+ "/endpoints",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=await async_maybe_transform({"type": type}, endpoint_list_params.EndpointListParams),
+ ),
+ cast_to=EndpointListResponse,
+ )
+
+ async def delete(
+ self,
+ endpoint_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> None:
+ """Permanently deletes an endpoint.
+
+ This action cannot be undone.
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not endpoint_id:
+ raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+ extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+ return await self._delete(
+ f"/endpoints/{endpoint_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=NoneType,
+ )
+
+
+class EndpointsResourceWithRawResponse:
+ def __init__(self, endpoints: EndpointsResource) -> None:
+ self._endpoints = endpoints
+
+ self.create = to_raw_response_wrapper(
+ endpoints.create,
+ )
+ self.retrieve = to_raw_response_wrapper(
+ endpoints.retrieve,
+ )
+ self.update = to_raw_response_wrapper(
+ endpoints.update,
+ )
+ self.list = to_raw_response_wrapper(
+ endpoints.list,
+ )
+ self.delete = to_raw_response_wrapper(
+ endpoints.delete,
+ )
+
+
+class AsyncEndpointsResourceWithRawResponse:
+ def __init__(self, endpoints: AsyncEndpointsResource) -> None:
+ self._endpoints = endpoints
+
+ self.create = async_to_raw_response_wrapper(
+ endpoints.create,
+ )
+ self.retrieve = async_to_raw_response_wrapper(
+ endpoints.retrieve,
+ )
+ self.update = async_to_raw_response_wrapper(
+ endpoints.update,
+ )
+ self.list = async_to_raw_response_wrapper(
+ endpoints.list,
+ )
+ self.delete = async_to_raw_response_wrapper(
+ endpoints.delete,
+ )
+
+
+class EndpointsResourceWithStreamingResponse:
+ def __init__(self, endpoints: EndpointsResource) -> None:
+ self._endpoints = endpoints
+
+ self.create = to_streamed_response_wrapper(
+ endpoints.create,
+ )
+ self.retrieve = to_streamed_response_wrapper(
+ endpoints.retrieve,
+ )
+ self.update = to_streamed_response_wrapper(
+ endpoints.update,
+ )
+ self.list = to_streamed_response_wrapper(
+ endpoints.list,
+ )
+ self.delete = to_streamed_response_wrapper(
+ endpoints.delete,
+ )
+
+
+class AsyncEndpointsResourceWithStreamingResponse:
+ def __init__(self, endpoints: AsyncEndpointsResource) -> None:
+ self._endpoints = endpoints
+
+ self.create = async_to_streamed_response_wrapper(
+ endpoints.create,
+ )
+ self.retrieve = async_to_streamed_response_wrapper(
+ endpoints.retrieve,
+ )
+ self.update = async_to_streamed_response_wrapper(
+ endpoints.update,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ endpoints.list,
+ )
+ self.delete = async_to_streamed_response_wrapper(
+ endpoints.delete,
+ )
diff --git a/src/together/resources/fine_tune.py b/src/together/resources/fine_tune.py
index b3b62585..84619f78 100644
--- a/src/together/resources/fine_tune.py
+++ b/src/together/resources/fine_tune.py
@@ -9,10 +9,7 @@
from ..types import fine_tune_create_params, fine_tune_download_params
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
- maybe_transform,
- async_maybe_transform,
-)
+from .._utils import maybe_transform, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
@@ -55,7 +52,8 @@ def create(
*,
model: str,
training_file: str,
- batch_size: int | NotGiven = NOT_GIVEN,
+ batch_size: Union[int, Literal["max"]] | NotGiven = NOT_GIVEN,
+ from_checkpoint: str | NotGiven = NOT_GIVEN,
learning_rate: float | NotGiven = NOT_GIVEN,
lr_scheduler: fine_tune_create_params.LrScheduler | NotGiven = NOT_GIVEN,
max_grad_norm: float | NotGiven = NOT_GIVEN,
@@ -64,6 +62,7 @@ def create(
n_evals: int | NotGiven = NOT_GIVEN,
suffix: str | NotGiven = NOT_GIVEN,
train_on_inputs: Union[bool, Literal["auto"]] | NotGiven = NOT_GIVEN,
+ training_method: fine_tune_create_params.TrainingMethod | NotGiven = NOT_GIVEN,
training_type: fine_tune_create_params.TrainingType | NotGiven = NOT_GIVEN,
validation_file: str | NotGiven = NOT_GIVEN,
wandb_api_key: str | NotGiven = NOT_GIVEN,
@@ -88,11 +87,20 @@ def create(
training_file: File-ID of a training file uploaded to the Together API
batch_size: Number of training examples processed together (larger batches use more memory
- but may train faster)
+ but may train faster). Defaults to "max". We use training optimizations like
+ packing, so the effective batch size may be different than the value you set.
+
+ from_checkpoint: The checkpoint identifier to continue training from a previous fine-tuning job.
+ Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or
+ `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the
+ final checkpoint will be used.
learning_rate: Controls how quickly the model adapts to new information (too high may cause
instability, too low may slow convergence)
+ lr_scheduler: The learning rate scheduler to use. It specifies how the learning rate is
+ adjusted during training.
+
max_grad_norm: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
n_checkpoints: Number of intermediate model versions saved during training for evaluation
@@ -107,6 +115,9 @@ def create(
train_on_inputs: Whether to mask the user messages in conversational data or prompts in
instruction data.
+ training_method: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct
+ Preference Optimization.
+
validation_file: File-ID of a validation file uploaded to the Together API
wandb_api_key: Integration key for tracking experiments and model metrics on W&B platform
@@ -121,7 +132,7 @@ def create(
warmup_ratio: The percent of steps at the start of training to linearly increase the learning
rate.
- weight_decay: Weight decay
+ weight_decay: Weight decay. Regularization parameter for the optimizer.
extra_headers: Send extra headers
@@ -138,6 +149,7 @@ def create(
"model": model,
"training_file": training_file,
"batch_size": batch_size,
+ "from_checkpoint": from_checkpoint,
"learning_rate": learning_rate,
"lr_scheduler": lr_scheduler,
"max_grad_norm": max_grad_norm,
@@ -146,6 +158,7 @@ def create(
"n_evals": n_evals,
"suffix": suffix,
"train_on_inputs": train_on_inputs,
+ "training_method": training_method,
"training_type": training_type,
"validation_file": validation_file,
"wandb_api_key": wandb_api_key,
@@ -364,7 +377,8 @@ async def create(
*,
model: str,
training_file: str,
- batch_size: int | NotGiven = NOT_GIVEN,
+ batch_size: Union[int, Literal["max"]] | NotGiven = NOT_GIVEN,
+ from_checkpoint: str | NotGiven = NOT_GIVEN,
learning_rate: float | NotGiven = NOT_GIVEN,
lr_scheduler: fine_tune_create_params.LrScheduler | NotGiven = NOT_GIVEN,
max_grad_norm: float | NotGiven = NOT_GIVEN,
@@ -373,6 +387,7 @@ async def create(
n_evals: int | NotGiven = NOT_GIVEN,
suffix: str | NotGiven = NOT_GIVEN,
train_on_inputs: Union[bool, Literal["auto"]] | NotGiven = NOT_GIVEN,
+ training_method: fine_tune_create_params.TrainingMethod | NotGiven = NOT_GIVEN,
training_type: fine_tune_create_params.TrainingType | NotGiven = NOT_GIVEN,
validation_file: str | NotGiven = NOT_GIVEN,
wandb_api_key: str | NotGiven = NOT_GIVEN,
@@ -397,11 +412,20 @@ async def create(
training_file: File-ID of a training file uploaded to the Together API
batch_size: Number of training examples processed together (larger batches use more memory
- but may train faster)
+ but may train faster). Defaults to "max". We use training optimizations like
+ packing, so the effective batch size may be different than the value you set.
+
+ from_checkpoint: The checkpoint identifier to continue training from a previous fine-tuning job.
+ Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or
+ `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the
+ final checkpoint will be used.
learning_rate: Controls how quickly the model adapts to new information (too high may cause
instability, too low may slow convergence)
+ lr_scheduler: The learning rate scheduler to use. It specifies how the learning rate is
+ adjusted during training.
+
max_grad_norm: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
n_checkpoints: Number of intermediate model versions saved during training for evaluation
@@ -416,6 +440,9 @@ async def create(
train_on_inputs: Whether to mask the user messages in conversational data or prompts in
instruction data.
+ training_method: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct
+ Preference Optimization.
+
validation_file: File-ID of a validation file uploaded to the Together API
wandb_api_key: Integration key for tracking experiments and model metrics on W&B platform
@@ -430,7 +457,7 @@ async def create(
warmup_ratio: The percent of steps at the start of training to linearly increase the learning
rate.
- weight_decay: Weight decay
+ weight_decay: Weight decay. Regularization parameter for the optimizer.
extra_headers: Send extra headers
@@ -447,6 +474,7 @@ async def create(
"model": model,
"training_file": training_file,
"batch_size": batch_size,
+ "from_checkpoint": from_checkpoint,
"learning_rate": learning_rate,
"lr_scheduler": lr_scheduler,
"max_grad_norm": max_grad_norm,
@@ -455,6 +483,7 @@ async def create(
"n_evals": n_evals,
"suffix": suffix,
"train_on_inputs": train_on_inputs,
+ "training_method": training_method,
"training_type": training_type,
"validation_file": validation_file,
"wandb_api_key": wandb_api_key,
diff --git a/src/together/resources/hardware.py b/src/together/resources/hardware.py
new file mode 100644
index 00000000..302e6303
--- /dev/null
+++ b/src/together/resources/hardware.py
@@ -0,0 +1,181 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ..types import hardware_list_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.hardware_list_response import HardwareListResponse
+
+__all__ = ["HardwareResource", "AsyncHardwareResource"]
+
+
+class HardwareResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> HardwareResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return HardwareResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> HardwareResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return HardwareResourceWithStreamingResponse(self)
+
+ def list(
+ self,
+ *,
+ model: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> HardwareListResponse:
+ """Returns a list of available hardware configurations for deploying models.
+
+ When a
+ model parameter is provided, it returns only hardware configurations compatible
+ with that model, including their current availability status.
+
+ Args:
+ model: Filter hardware configurations by model compatibility. When provided, the
+ response includes availability status for each compatible configuration.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._get(
+ "/hardware",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=maybe_transform({"model": model}, hardware_list_params.HardwareListParams),
+ ),
+ cast_to=HardwareListResponse,
+ )
+
+
+class AsyncHardwareResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncHardwareResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return AsyncHardwareResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncHardwareResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return AsyncHardwareResourceWithStreamingResponse(self)
+
+ async def list(
+ self,
+ *,
+ model: str | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> HardwareListResponse:
+ """Returns a list of available hardware configurations for deploying models.
+
+ When a
+ model parameter is provided, it returns only hardware configurations compatible
+ with that model, including their current availability status.
+
+ Args:
+ model: Filter hardware configurations by model compatibility. When provided, the
+ response includes availability status for each compatible configuration.
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._get(
+ "/hardware",
+ options=make_request_options(
+ extra_headers=extra_headers,
+ extra_query=extra_query,
+ extra_body=extra_body,
+ timeout=timeout,
+ query=await async_maybe_transform({"model": model}, hardware_list_params.HardwareListParams),
+ ),
+ cast_to=HardwareListResponse,
+ )
+
+
+class HardwareResourceWithRawResponse:
+ def __init__(self, hardware: HardwareResource) -> None:
+ self._hardware = hardware
+
+ self.list = to_raw_response_wrapper(
+ hardware.list,
+ )
+
+
+class AsyncHardwareResourceWithRawResponse:
+ def __init__(self, hardware: AsyncHardwareResource) -> None:
+ self._hardware = hardware
+
+ self.list = async_to_raw_response_wrapper(
+ hardware.list,
+ )
+
+
+class HardwareResourceWithStreamingResponse:
+ def __init__(self, hardware: HardwareResource) -> None:
+ self._hardware = hardware
+
+ self.list = to_streamed_response_wrapper(
+ hardware.list,
+ )
+
+
+class AsyncHardwareResourceWithStreamingResponse:
+ def __init__(self, hardware: AsyncHardwareResource) -> None:
+ self._hardware = hardware
+
+ self.list = async_to_streamed_response_wrapper(
+ hardware.list,
+ )
diff --git a/src/together/resources/images.py b/src/together/resources/images.py
index c92542df..30526e1d 100644
--- a/src/together/resources/images.py
+++ b/src/together/resources/images.py
@@ -9,10 +9,7 @@
from ..types import image_create_params
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
- maybe_transform,
- async_maybe_transform,
-)
+from .._utils import maybe_transform, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
diff --git a/src/together/resources/jobs.py b/src/together/resources/jobs.py
new file mode 100644
index 00000000..2eae45ab
--- /dev/null
+++ b/src/together/resources/jobs.py
@@ -0,0 +1,214 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+ to_raw_response_wrapper,
+ to_streamed_response_wrapper,
+ async_to_raw_response_wrapper,
+ async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.job_list_response import JobListResponse
+from ..types.job_retrieve_response import JobRetrieveResponse
+
+__all__ = ["JobsResource", "AsyncJobsResource"]
+
+
+class JobsResource(SyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> JobsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return JobsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> JobsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return JobsResourceWithStreamingResponse(self)
+
+ def retrieve(
+ self,
+ job_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> JobRetrieveResponse:
+ """
+ Get the status of a specific job
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not job_id:
+ raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+ return self._get(
+ f"/jobs/{job_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=JobRetrieveResponse,
+ )
+
+ def list(
+ self,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> JobListResponse:
+ """List all jobs and their statuses"""
+ return self._get(
+ "/jobs",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=JobListResponse,
+ )
+
+
+class AsyncJobsResource(AsyncAPIResource):
+ @cached_property
+ def with_raw_response(self) -> AsyncJobsResourceWithRawResponse:
+ """
+ This property can be used as a prefix for any HTTP method call to return
+ the raw response object instead of the parsed content.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+ """
+ return AsyncJobsResourceWithRawResponse(self)
+
+ @cached_property
+ def with_streaming_response(self) -> AsyncJobsResourceWithStreamingResponse:
+ """
+ An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+ For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+ """
+ return AsyncJobsResourceWithStreamingResponse(self)
+
+ async def retrieve(
+ self,
+ job_id: str,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> JobRetrieveResponse:
+ """
+ Get the status of a specific job
+
+ Args:
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ if not job_id:
+ raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+ return await self._get(
+ f"/jobs/{job_id}",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=JobRetrieveResponse,
+ )
+
+ async def list(
+ self,
+ *,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> JobListResponse:
+ """List all jobs and their statuses"""
+ return await self._get(
+ "/jobs",
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=JobListResponse,
+ )
+
+
+class JobsResourceWithRawResponse:
+ def __init__(self, jobs: JobsResource) -> None:
+ self._jobs = jobs
+
+ self.retrieve = to_raw_response_wrapper(
+ jobs.retrieve,
+ )
+ self.list = to_raw_response_wrapper(
+ jobs.list,
+ )
+
+
+class AsyncJobsResourceWithRawResponse:
+ def __init__(self, jobs: AsyncJobsResource) -> None:
+ self._jobs = jobs
+
+ self.retrieve = async_to_raw_response_wrapper(
+ jobs.retrieve,
+ )
+ self.list = async_to_raw_response_wrapper(
+ jobs.list,
+ )
+
+
+class JobsResourceWithStreamingResponse:
+ def __init__(self, jobs: JobsResource) -> None:
+ self._jobs = jobs
+
+ self.retrieve = to_streamed_response_wrapper(
+ jobs.retrieve,
+ )
+ self.list = to_streamed_response_wrapper(
+ jobs.list,
+ )
+
+
+class AsyncJobsResourceWithStreamingResponse:
+ def __init__(self, jobs: AsyncJobsResource) -> None:
+ self._jobs = jobs
+
+ self.retrieve = async_to_streamed_response_wrapper(
+ jobs.retrieve,
+ )
+ self.list = async_to_streamed_response_wrapper(
+ jobs.list,
+ )
diff --git a/src/together/resources/models.py b/src/together/resources/models.py
index 1432e2c0..b4a9cc73 100644
--- a/src/together/resources/models.py
+++ b/src/together/resources/models.py
@@ -2,9 +2,13 @@
from __future__ import annotations
+from typing_extensions import Literal
+
import httpx
+from ..types import model_upload_params
from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
@@ -15,6 +19,7 @@
)
from .._base_client import make_request_options
from ..types.model_list_response import ModelListResponse
+from ..types.model_upload_response import ModelUploadResponse
__all__ = ["ModelsResource", "AsyncModelsResource"]
@@ -58,6 +63,71 @@ def list(
cast_to=ModelListResponse,
)
+ def upload(
+ self,
+ *,
+ model_name: str,
+ model_source: str,
+ base_model: str | NotGiven = NOT_GIVEN,
+ description: str | NotGiven = NOT_GIVEN,
+ hf_token: str | NotGiven = NOT_GIVEN,
+ lora_model: str | NotGiven = NOT_GIVEN,
+ model_type: Literal["model", "adapter"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ModelUploadResponse:
+ """
+ Upload a custom model or adapter from Hugging Face or S3
+
+ Args:
+ model_name: The name to give to your uploaded model
+
+ model_source: The source location of the model (Hugging Face repo or S3 path)
+
+ base_model: The base model to use for an adapter if setting it to run against a serverless
+ pool. Only used for model_type `adapter`.
+
+ description: A description of your model
+
+ hf_token: Hugging Face token (if uploading from Hugging Face)
+
+ lora_model: The lora pool to use for an adapter if setting it to run against, say, a
+ dedicated pool. Only used for model_type `adapter`.
+
+ model_type: Whether the model is a full model or an adapter
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return self._post(
+ "/models",
+ body=maybe_transform(
+ {
+ "model_name": model_name,
+ "model_source": model_source,
+ "base_model": base_model,
+ "description": description,
+ "hf_token": hf_token,
+ "lora_model": lora_model,
+ "model_type": model_type,
+ },
+ model_upload_params.ModelUploadParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ModelUploadResponse,
+ )
+
class AsyncModelsResource(AsyncAPIResource):
@cached_property
@@ -98,6 +168,71 @@ async def list(
cast_to=ModelListResponse,
)
+ async def upload(
+ self,
+ *,
+ model_name: str,
+ model_source: str,
+ base_model: str | NotGiven = NOT_GIVEN,
+ description: str | NotGiven = NOT_GIVEN,
+ hf_token: str | NotGiven = NOT_GIVEN,
+ lora_model: str | NotGiven = NOT_GIVEN,
+ model_type: Literal["model", "adapter"] | NotGiven = NOT_GIVEN,
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+ # The extra values given here take precedence over values defined on the client or passed to this method.
+ extra_headers: Headers | None = None,
+ extra_query: Query | None = None,
+ extra_body: Body | None = None,
+ timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+ ) -> ModelUploadResponse:
+ """
+ Upload a custom model or adapter from Hugging Face or S3
+
+ Args:
+ model_name: The name to give to your uploaded model
+
+ model_source: The source location of the model (Hugging Face repo or S3 path)
+
+ base_model: The base model to use for an adapter if setting it to run against a serverless
+ pool. Only used for model_type `adapter`.
+
+ description: A description of your model
+
+ hf_token: Hugging Face token (if uploading from Hugging Face)
+
+ lora_model: The lora pool to use for an adapter if setting it to run against, say, a
+ dedicated pool. Only used for model_type `adapter`.
+
+ model_type: Whether the model is a full model or an adapter
+
+ extra_headers: Send extra headers
+
+ extra_query: Add additional query parameters to the request
+
+ extra_body: Add additional JSON properties to the request
+
+ timeout: Override the client-level default timeout for this request, in seconds
+ """
+ return await self._post(
+ "/models",
+ body=await async_maybe_transform(
+ {
+ "model_name": model_name,
+ "model_source": model_source,
+ "base_model": base_model,
+ "description": description,
+ "hf_token": hf_token,
+ "lora_model": lora_model,
+ "model_type": model_type,
+ },
+ model_upload_params.ModelUploadParams,
+ ),
+ options=make_request_options(
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+ ),
+ cast_to=ModelUploadResponse,
+ )
+
class ModelsResourceWithRawResponse:
def __init__(self, models: ModelsResource) -> None:
@@ -106,6 +241,9 @@ def __init__(self, models: ModelsResource) -> None:
self.list = to_raw_response_wrapper(
models.list,
)
+ self.upload = to_raw_response_wrapper(
+ models.upload,
+ )
class AsyncModelsResourceWithRawResponse:
@@ -115,6 +253,9 @@ def __init__(self, models: AsyncModelsResource) -> None:
self.list = async_to_raw_response_wrapper(
models.list,
)
+ self.upload = async_to_raw_response_wrapper(
+ models.upload,
+ )
class ModelsResourceWithStreamingResponse:
@@ -124,6 +265,9 @@ def __init__(self, models: ModelsResource) -> None:
self.list = to_streamed_response_wrapper(
models.list,
)
+ self.upload = to_streamed_response_wrapper(
+ models.upload,
+ )
class AsyncModelsResourceWithStreamingResponse:
@@ -133,3 +277,6 @@ def __init__(self, models: AsyncModelsResource) -> None:
self.list = async_to_streamed_response_wrapper(
models.list,
)
+ self.upload = async_to_streamed_response_wrapper(
+ models.upload,
+ )
diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py
index 5d067f82..d3096869 100644
--- a/src/together/types/__init__.py
+++ b/src/together/types/__init__.py
@@ -11,17 +11,32 @@
from .tools_param import ToolsParam as ToolsParam
from .fine_tune_event import FineTuneEvent as FineTuneEvent
from .rerank_response import RerankResponse as RerankResponse
+from .execute_response import ExecuteResponse as ExecuteResponse
+from .job_list_response import JobListResponse as JobListResponse
from .tool_choice_param import ToolChoiceParam as ToolChoiceParam
from .file_list_response import FileListResponse as FileListResponse
from .audio_create_params import AudioCreateParams as AudioCreateParams
from .image_create_params import ImageCreateParams as ImageCreateParams
from .model_list_response import ModelListResponse as ModelListResponse
+from .model_upload_params import ModelUploadParams as ModelUploadParams
from .client_rerank_params import ClientRerankParams as ClientRerankParams
+from .endpoint_list_params import EndpointListParams as EndpointListParams
from .file_delete_response import FileDeleteResponse as FileDeleteResponse
+from .hardware_list_params import HardwareListParams as HardwareListParams
+from .job_retrieve_response import JobRetrieveResponse as JobRetrieveResponse
+from .model_upload_response import ModelUploadResponse as ModelUploadResponse
+from .endpoint_create_params import EndpointCreateParams as EndpointCreateParams
+from .endpoint_list_response import EndpointListResponse as EndpointListResponse
+from .endpoint_update_params import EndpointUpdateParams as EndpointUpdateParams
from .file_retrieve_response import FileRetrieveResponse as FileRetrieveResponse
+from .hardware_list_response import HardwareListResponse as HardwareListResponse
from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
from .fine_tune_create_params import FineTuneCreateParams as FineTuneCreateParams
from .fine_tune_list_response import FineTuneListResponse as FineTuneListResponse
from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .endpoint_create_response import EndpointCreateResponse as EndpointCreateResponse
+from .endpoint_update_response import EndpointUpdateResponse as EndpointUpdateResponse
from .fine_tune_download_params import FineTuneDownloadParams as FineTuneDownloadParams
+from .endpoint_retrieve_response import EndpointRetrieveResponse as EndpointRetrieveResponse
from .fine_tune_download_response import FineTuneDownloadResponse as FineTuneDownloadResponse
+from .code_interpreter_execute_params import CodeInterpreterExecuteParams as CodeInterpreterExecuteParams
diff --git a/src/together/types/chat/chat_completion_structured_message_image_url_param.py b/src/together/types/chat/chat_completion_structured_message_image_url_param.py
index 1a1f5bfc..25d737ca 100644
--- a/src/together/types/chat/chat_completion_structured_message_image_url_param.py
+++ b/src/together/types/chat/chat_completion_structured_message_image_url_param.py
@@ -9,10 +9,10 @@
class ImageURL(TypedDict, total=False):
url: Required[str]
- """The URL of the image as a plain string."""
+ """The URL of the image"""
class ChatCompletionStructuredMessageImageURLParam(TypedDict, total=False):
- image_url: Required[ImageURL]
+ image_url: ImageURL
- type: Required[Literal["image_url"]]
+ type: Literal["image_url"]
diff --git a/src/together/types/chat/chat_completion_usage.py b/src/together/types/chat/chat_completion_usage.py
index 82b9d450..510233f9 100644
--- a/src/together/types/chat/chat_completion_usage.py
+++ b/src/together/types/chat/chat_completion_usage.py
@@ -1,6 +1,5 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
from ..._models import BaseModel
__all__ = ["ChatCompletionUsage"]
diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py
index d6749ac7..be7d65c0 100644
--- a/src/together/types/chat/completion_create_params.py
+++ b/src/together/types/chat/completion_create_params.py
@@ -14,6 +14,8 @@
"CompletionCreateParamsBase",
"Message",
"MessageContentUnionMember1",
+ "MessageContentUnionMember1Video",
+ "MessageContentUnionMember1VideoVideoURL",
"FunctionCall",
"FunctionCallName",
"ResponseFormat",
@@ -157,8 +159,21 @@ class CompletionCreateParamsBase(TypedDict, total=False):
"""
+class MessageContentUnionMember1VideoVideoURL(TypedDict, total=False):
+ url: Required[str]
+ """The URL of the video"""
+
+
+class MessageContentUnionMember1Video(TypedDict, total=False):
+ type: Required[Literal["video_url"]]
+
+ video_url: Required[MessageContentUnionMember1VideoVideoURL]
+
+
MessageContentUnionMember1: TypeAlias = Union[
- ChatCompletionStructuredMessageTextParam, ChatCompletionStructuredMessageImageURLParam
+ ChatCompletionStructuredMessageTextParam,
+ ChatCompletionStructuredMessageImageURLParam,
+ MessageContentUnionMember1Video,
]
@@ -170,7 +185,10 @@ class Message(TypedDict, total=False):
"""
role: Required[Literal["system", "user", "assistant", "tool"]]
- """The role of the messages author. Choice between: system, user, or assistant."""
+ """The role of the messages author.
+
+ Choice between: system, user, assistant, or tool.
+ """
class FunctionCallName(TypedDict, total=False):
@@ -181,7 +199,7 @@ class FunctionCallName(TypedDict, total=False):
class ResponseFormat(TypedDict, total=False):
- schema: Dict[str, str]
+ schema: Dict[str, object]
"""The schema of the response format."""
type: str
diff --git a/src/together/types/code_interpreter/__init__.py b/src/together/types/code_interpreter/__init__.py
new file mode 100644
index 00000000..82331854
--- /dev/null
+++ b/src/together/types/code_interpreter/__init__.py
@@ -0,0 +1,5 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .session_list_response import SessionListResponse as SessionListResponse
diff --git a/src/together/types/code_interpreter/session_list_response.py b/src/together/types/code_interpreter/session_list_response.py
new file mode 100644
index 00000000..f4379c0d
--- /dev/null
+++ b/src/together/types/code_interpreter/session_list_response.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from datetime import datetime
+
+from ..._models import BaseModel
+
+__all__ = ["SessionListResponse", "Data", "DataSession"]
+
+
+class DataSession(BaseModel):
+ id: str
+ """Session Identifier. Used to make follow-up calls."""
+
+ execute_count: int
+
+ expires_at: datetime
+
+ last_execute_at: datetime
+
+ started_at: datetime
+
+
+class Data(BaseModel):
+ sessions: List[DataSession]
+
+
+class SessionListResponse(BaseModel):
+ data: Optional[Data] = None
+
+ errors: Optional[List[Union[str, Dict[str, object]]]] = None
diff --git a/src/together/types/code_interpreter_execute_params.py b/src/together/types/code_interpreter_execute_params.py
new file mode 100644
index 00000000..91cf6c02
--- /dev/null
+++ b/src/together/types/code_interpreter_execute_params.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["CodeInterpreterExecuteParams", "File"]
+
+
+class CodeInterpreterExecuteParams(TypedDict, total=False):
+ code: Required[str]
+ """Code snippet to execute."""
+
+ language: Required[Literal["python"]]
+ """Programming language for the code to execute.
+
+ Currently only supports Python, but more will be added.
+ """
+
+ files: Iterable[File]
+ """Files to upload to the session.
+
+ If present, files will be uploaded before executing the given code.
+ """
+
+ session_id: str
+ """Identifier of the current session.
+
+ Used to make follow-up calls. Requests will return an error if the session does
+ not belong to the caller or has expired.
+ """
+
+
+class File(TypedDict, total=False):
+ content: Required[str]
+
+ encoding: Required[Literal["string", "base64"]]
+ """Encoding of the file content.
+
+ Use `string` for text files such as code, and `base64` for binary files, such as
+ images.
+ """
+
+ name: Required[str]
diff --git a/src/together/types/endpoint_create_params.py b/src/together/types/endpoint_create_params.py
new file mode 100644
index 00000000..0e41f9cd
--- /dev/null
+++ b/src/together/types/endpoint_create_params.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["EndpointCreateParams", "Autoscaling"]
+
+
+class EndpointCreateParams(TypedDict, total=False):
+ autoscaling: Required[Autoscaling]
+ """Configuration for automatic scaling of the endpoint"""
+
+ hardware: Required[str]
+ """The hardware configuration to use for this endpoint"""
+
+ model: Required[str]
+ """The model to deploy on this endpoint"""
+
+ disable_prompt_cache: bool
+ """Whether to disable the prompt cache for this endpoint"""
+
+ disable_speculative_decoding: bool
+ """Whether to disable speculative decoding for this endpoint"""
+
+ display_name: str
+ """A human-readable name for the endpoint"""
+
+ inactive_timeout: Optional[int]
+ """
+ The number of minutes of inactivity after which the endpoint will be
+ automatically stopped. Set to null, omit or set to 0 to disable automatic
+ timeout.
+ """
+
+ state: Literal["STARTED", "STOPPED"]
+ """The desired state of the endpoint"""
+
+
+class Autoscaling(TypedDict, total=False):
+ max_replicas: Required[int]
+ """The maximum number of replicas to scale up to under load"""
+
+ min_replicas: Required[int]
+ """The minimum number of replicas to maintain, even when there is no load"""
diff --git a/src/together/types/endpoint_create_response.py b/src/together/types/endpoint_create_response.py
new file mode 100644
index 00000000..2421d2f3
--- /dev/null
+++ b/src/together/types/endpoint_create_response.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EndpointCreateResponse", "Autoscaling"]
+
+
+class Autoscaling(BaseModel):
+ max_replicas: int
+ """The maximum number of replicas to scale up to under load"""
+
+ min_replicas: int
+ """The minimum number of replicas to maintain, even when there is no load"""
+
+
+class EndpointCreateResponse(BaseModel):
+ id: str
+ """Unique identifier for the endpoint"""
+
+ autoscaling: Autoscaling
+ """Configuration for automatic scaling of the endpoint"""
+
+ created_at: datetime
+ """Timestamp when the endpoint was created"""
+
+ display_name: str
+ """Human-readable name for the endpoint"""
+
+ hardware: str
+ """The hardware configuration used for this endpoint"""
+
+ model: str
+ """The model deployed on this endpoint"""
+
+ name: str
+ """System name for the endpoint"""
+
+ object: Literal["endpoint"]
+ """The type of object"""
+
+ owner: str
+ """The owner of this endpoint"""
+
+ state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
+ """Current state of the endpoint"""
+
+ type: Literal["dedicated"]
+ """The type of endpoint"""
diff --git a/src/together/types/endpoint_list_params.py b/src/together/types/endpoint_list_params.py
new file mode 100644
index 00000000..5123d49d
--- /dev/null
+++ b/src/together/types/endpoint_list_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["EndpointListParams"]
+
+
+class EndpointListParams(TypedDict, total=False):
+ type: Literal["dedicated", "serverless"]
+ """Filter endpoints by type"""
diff --git a/src/together/types/endpoint_list_response.py b/src/together/types/endpoint_list_response.py
new file mode 100644
index 00000000..009764a7
--- /dev/null
+++ b/src/together/types/endpoint_list_response.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EndpointListResponse", "Data"]
+
+
+class Data(BaseModel):
+ id: str
+ """Unique identifier for the endpoint"""
+
+ created_at: datetime
+ """Timestamp when the endpoint was created"""
+
+ model: str
+ """The model deployed on this endpoint"""
+
+ name: str
+ """System name for the endpoint"""
+
+ object: Literal["endpoint"]
+ """The type of object"""
+
+ owner: str
+ """The owner of this endpoint"""
+
+ state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
+ """Current state of the endpoint"""
+
+ type: Literal["serverless", "dedicated"]
+ """The type of endpoint"""
+
+
+class EndpointListResponse(BaseModel):
+ data: List[Data]
+
+ object: Literal["list"]
diff --git a/src/together/types/endpoint_retrieve_response.py b/src/together/types/endpoint_retrieve_response.py
new file mode 100644
index 00000000..cb471adc
--- /dev/null
+++ b/src/together/types/endpoint_retrieve_response.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EndpointRetrieveResponse", "Autoscaling"]
+
+
+class Autoscaling(BaseModel):
+ max_replicas: int
+ """The maximum number of replicas to scale up to under load"""
+
+ min_replicas: int
+ """The minimum number of replicas to maintain, even when there is no load"""
+
+
+class EndpointRetrieveResponse(BaseModel):
+ id: str
+ """Unique identifier for the endpoint"""
+
+ autoscaling: Autoscaling
+ """Configuration for automatic scaling of the endpoint"""
+
+ created_at: datetime
+ """Timestamp when the endpoint was created"""
+
+ display_name: str
+ """Human-readable name for the endpoint"""
+
+ hardware: str
+ """The hardware configuration used for this endpoint"""
+
+ model: str
+ """The model deployed on this endpoint"""
+
+ name: str
+ """System name for the endpoint"""
+
+ object: Literal["endpoint"]
+ """The type of object"""
+
+ owner: str
+ """The owner of this endpoint"""
+
+ state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
+ """Current state of the endpoint"""
+
+ type: Literal["dedicated"]
+ """The type of endpoint"""
diff --git a/src/together/types/endpoint_update_params.py b/src/together/types/endpoint_update_params.py
new file mode 100644
index 00000000..85ec7527
--- /dev/null
+++ b/src/together/types/endpoint_update_params.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["EndpointUpdateParams", "Autoscaling"]
+
+
+class EndpointUpdateParams(TypedDict, total=False):
+ autoscaling: Autoscaling
+ """New autoscaling configuration for the endpoint"""
+
+ display_name: str
+ """A human-readable name for the endpoint"""
+
+ inactive_timeout: Optional[int]
+ """
+ The number of minutes of inactivity after which the endpoint will be
+ automatically stopped. Set to 0 to disable automatic timeout.
+ """
+
+ state: Literal["STARTED", "STOPPED"]
+ """The desired state of the endpoint"""
+
+
+class Autoscaling(TypedDict, total=False):
+ max_replicas: Required[int]
+ """The maximum number of replicas to scale up to under load"""
+
+ min_replicas: Required[int]
+ """The minimum number of replicas to maintain, even when there is no load"""
diff --git a/src/together/types/endpoint_update_response.py b/src/together/types/endpoint_update_response.py
new file mode 100644
index 00000000..24cb6487
--- /dev/null
+++ b/src/together/types/endpoint_update_response.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EndpointUpdateResponse", "Autoscaling"]
+
+
+class Autoscaling(BaseModel):
+ max_replicas: int
+ """The maximum number of replicas to scale up to under load"""
+
+ min_replicas: int
+ """The minimum number of replicas to maintain, even when there is no load"""
+
+
+class EndpointUpdateResponse(BaseModel):
+ id: str
+ """Unique identifier for the endpoint"""
+
+ autoscaling: Autoscaling
+ """Configuration for automatic scaling of the endpoint"""
+
+ created_at: datetime
+ """Timestamp when the endpoint was created"""
+
+ display_name: str
+ """Human-readable name for the endpoint"""
+
+ hardware: str
+ """The hardware configuration used for this endpoint"""
+
+ model: str
+ """The model deployed on this endpoint"""
+
+ name: str
+ """System name for the endpoint"""
+
+ object: Literal["endpoint"]
+ """The type of object"""
+
+ owner: str
+ """The owner of this endpoint"""
+
+ state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
+ """Current state of the endpoint"""
+
+ type: Literal["dedicated"]
+ """The type of endpoint"""
diff --git a/src/together/types/execute_response.py b/src/together/types/execute_response.py
new file mode 100644
index 00000000..9234b72c
--- /dev/null
+++ b/src/together/types/execute_response.py
@@ -0,0 +1,105 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+
+__all__ = [
+ "ExecuteResponse",
+ "SuccessfulExecution",
+ "SuccessfulExecutionData",
+ "SuccessfulExecutionDataOutput",
+ "SuccessfulExecutionDataOutputStreamOutput",
+ "SuccessfulExecutionDataOutputError",
+ "SuccessfulExecutionDataOutputDisplayorExecuteOutput",
+ "SuccessfulExecutionDataOutputDisplayorExecuteOutputData",
+ "FailedExecution",
+]
+
+
+class SuccessfulExecutionDataOutputStreamOutput(BaseModel):
+ data: str
+
+ type: Literal["stdout", "stderr"]
+
+
+class SuccessfulExecutionDataOutputError(BaseModel):
+ data: str
+
+ type: Literal["error"]
+
+
+class SuccessfulExecutionDataOutputDisplayorExecuteOutputData(BaseModel):
+ application_geo_json: Optional[Dict[str, object]] = FieldInfo(alias="application/geo+json", default=None)
+
+ application_javascript: Optional[str] = FieldInfo(alias="application/javascript", default=None)
+
+ application_json: Optional[Dict[str, object]] = FieldInfo(alias="application/json", default=None)
+
+ application_pdf: Optional[str] = FieldInfo(alias="application/pdf", default=None)
+
+ application_vnd_vega_v5_json: Optional[Dict[str, object]] = FieldInfo(
+ alias="application/vnd.vega.v5+json", default=None
+ )
+
+ application_vnd_vegalite_v4_json: Optional[Dict[str, object]] = FieldInfo(
+ alias="application/vnd.vegalite.v4+json", default=None
+ )
+
+ image_gif: Optional[str] = FieldInfo(alias="image/gif", default=None)
+
+ image_jpeg: Optional[str] = FieldInfo(alias="image/jpeg", default=None)
+
+ image_png: Optional[str] = FieldInfo(alias="image/png", default=None)
+
+ image_svg_xml: Optional[str] = FieldInfo(alias="image/svg+xml", default=None)
+
+ text_html: Optional[str] = FieldInfo(alias="text/html", default=None)
+
+ text_latex: Optional[str] = FieldInfo(alias="text/latex", default=None)
+
+ text_markdown: Optional[str] = FieldInfo(alias="text/markdown", default=None)
+
+ text_plain: Optional[str] = FieldInfo(alias="text/plain", default=None)
+
+
+class SuccessfulExecutionDataOutputDisplayorExecuteOutput(BaseModel):
+ data: SuccessfulExecutionDataOutputDisplayorExecuteOutputData
+
+ type: Literal["display_data", "execute_result"]
+
+
+SuccessfulExecutionDataOutput: TypeAlias = Annotated[
+ Union[
+ SuccessfulExecutionDataOutputStreamOutput,
+ SuccessfulExecutionDataOutputError,
+ SuccessfulExecutionDataOutputDisplayorExecuteOutput,
+ ],
+ PropertyInfo(discriminator="type"),
+]
+
+
+class SuccessfulExecutionData(BaseModel):
+ outputs: List[SuccessfulExecutionDataOutput]
+
+ session_id: str
+ """Identifier of the current session. Used to make follow-up calls."""
+
+
+class SuccessfulExecution(BaseModel):
+ data: SuccessfulExecutionData
+
+ errors: None = None
+
+
+class FailedExecution(BaseModel):
+ data: None = None
+
+ errors: List[Union[str, Dict[str, object]]]
+
+
+ExecuteResponse: TypeAlias = Union[SuccessfulExecution, FailedExecution]
diff --git a/src/together/types/fine_tune.py b/src/together/types/fine_tune.py
index ca97fe66..bf873be0 100644
--- a/src/together/types/fine_tune.py
+++ b/src/together/types/fine_tune.py
@@ -12,6 +12,11 @@
"Event",
"LrScheduler",
"LrSchedulerLrSchedulerArgs",
+ "LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs",
+ "LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs",
+ "TrainingMethod",
+ "TrainingMethodTrainingMethodSft",
+ "TrainingMethodTrainingMethodDpo",
"TrainingType",
"TrainingTypeFullTrainingType",
"TrainingTypeLoRaTrainingType",
@@ -74,17 +79,43 @@ class Event(BaseModel):
level: Optional[Literal["info", "warning", "error", "legacy_info", "legacy_iwarning", "legacy_ierror"]] = None
-class LrSchedulerLrSchedulerArgs(BaseModel):
+class LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs(BaseModel):
min_lr_ratio: Optional[float] = None
"""The ratio of the final learning rate to the peak learning rate"""
+class LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs(BaseModel):
+ min_lr_ratio: Optional[float] = None
+ """The ratio of the final learning rate to the peak learning rate"""
+
+ num_cycles: Optional[float] = None
+ """Number or fraction of cycles for the cosine learning rate scheduler"""
+
+
+LrSchedulerLrSchedulerArgs: TypeAlias = Union[
+ LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs, LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs
+]
+
+
class LrScheduler(BaseModel):
- lr_scheduler_type: str
+ lr_scheduler_type: Literal["linear", "cosine"]
lr_scheduler_args: Optional[LrSchedulerLrSchedulerArgs] = None
+class TrainingMethodTrainingMethodSft(BaseModel):
+ method: Literal["sft"]
+
+
+class TrainingMethodTrainingMethodDpo(BaseModel):
+ method: Literal["dpo"]
+
+ dpo_beta: Optional[float] = None
+
+
+TrainingMethod: TypeAlias = Union[TrainingMethodTrainingMethodSft, TrainingMethodTrainingMethodDpo]
+
+
class TrainingTypeFullTrainingType(BaseModel):
type: Literal["Full"]
@@ -119,7 +150,7 @@ class FineTune(BaseModel):
"completed",
]
- batch_size: Optional[int] = None
+ batch_size: Union[int, Literal["max"], None] = None
created_at: Optional[str] = None
@@ -129,6 +160,8 @@ class FineTune(BaseModel):
events: Optional[List[Event]] = None
+ from_checkpoint: Optional[str] = None
+
job_id: Optional[str] = None
learning_rate: Optional[float] = None
@@ -161,6 +194,8 @@ class FineTune(BaseModel):
training_file: Optional[str] = None
+ training_method: Optional[TrainingMethod] = None
+
training_type: Optional[TrainingType] = None
trainingfile_numlines: Optional[int] = None
diff --git a/src/together/types/fine_tune_create_params.py b/src/together/types/fine_tune_create_params.py
index 1ace4f8d..3291e5a5 100644
--- a/src/together/types/fine_tune_create_params.py
+++ b/src/together/types/fine_tune_create_params.py
@@ -9,6 +9,11 @@
"FineTuneCreateParams",
"LrScheduler",
"LrSchedulerLrSchedulerArgs",
+ "LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs",
+ "LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs",
+ "TrainingMethod",
+ "TrainingMethodTrainingMethodSft",
+ "TrainingMethodTrainingMethodDpo",
"TrainingType",
"TrainingTypeFullTrainingType",
"TrainingTypeLoRaTrainingType",
@@ -22,10 +27,19 @@ class FineTuneCreateParams(TypedDict, total=False):
training_file: Required[str]
"""File-ID of a training file uploaded to the Together API"""
- batch_size: int
+ batch_size: Union[int, Literal["max"]]
"""
Number of training examples processed together (larger batches use more memory
- but may train faster)
+ but may train faster). Defaults to "max". We use training optimizations like
+ packing, so the effective batch size may be different than the value you set.
+ """
+
+ from_checkpoint: str
+ """The checkpoint identifier to continue training from a previous fine-tuning job.
+
+ Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or
+ `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the
+ final checkpoint will be used.
"""
learning_rate: float
@@ -35,6 +49,10 @@ class FineTuneCreateParams(TypedDict, total=False):
"""
lr_scheduler: LrScheduler
+ """The learning rate scheduler to use.
+
+ It specifies how the learning rate is adjusted during training.
+ """
max_grad_norm: float
"""Max gradient norm to be used for gradient clipping. Set to 0 to disable."""
@@ -60,6 +78,12 @@ class FineTuneCreateParams(TypedDict, total=False):
instruction data.
"""
+ training_method: TrainingMethod
+ """The training method to use.
+
+ 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
+ """
+
training_type: TrainingType
validation_file: str
@@ -87,20 +111,46 @@ class FineTuneCreateParams(TypedDict, total=False):
"""
weight_decay: float
- """Weight decay"""
+ """Weight decay. Regularization parameter for the optimizer."""
+
+
+class LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs(TypedDict, total=False):
+ min_lr_ratio: float
+ """The ratio of the final learning rate to the peak learning rate"""
-class LrSchedulerLrSchedulerArgs(TypedDict, total=False):
+class LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs(TypedDict, total=False):
min_lr_ratio: float
"""The ratio of the final learning rate to the peak learning rate"""
+ num_cycles: float
+ """Number or fraction of cycles for the cosine learning rate scheduler"""
+
+
+LrSchedulerLrSchedulerArgs: TypeAlias = Union[
+ LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs, LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs
+]
+
class LrScheduler(TypedDict, total=False):
- lr_scheduler_type: Required[str]
+ lr_scheduler_type: Required[Literal["linear", "cosine"]]
lr_scheduler_args: LrSchedulerLrSchedulerArgs
+class TrainingMethodTrainingMethodSft(TypedDict, total=False):
+ method: Required[Literal["sft"]]
+
+
+class TrainingMethodTrainingMethodDpo(TypedDict, total=False):
+ method: Required[Literal["dpo"]]
+
+ dpo_beta: float
+
+
+TrainingMethod: TypeAlias = Union[TrainingMethodTrainingMethodSft, TrainingMethodTrainingMethodDpo]
+
+
class TrainingTypeFullTrainingType(TypedDict, total=False):
type: Required[Literal["Full"]]
diff --git a/src/together/types/fine_tune_download_response.py b/src/together/types/fine_tune_download_response.py
index 055c235e..a5f5953b 100644
--- a/src/together/types/fine_tune_download_response.py
+++ b/src/together/types/fine_tune_download_response.py
@@ -1,7 +1,7 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-import builtins
from typing import Optional
+from typing_extensions import Literal
from .._models import BaseModel
@@ -15,6 +15,6 @@ class FineTuneDownloadResponse(BaseModel):
filename: Optional[str] = None
- object: Optional[builtins.object] = None
+ object: Optional[Literal["local"]] = None
size: Optional[int] = None
diff --git a/src/together/types/hardware_list_params.py b/src/together/types/hardware_list_params.py
new file mode 100644
index 00000000..6506655e
--- /dev/null
+++ b/src/together/types/hardware_list_params.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["HardwareListParams"]
+
+
+class HardwareListParams(TypedDict, total=False):
+ model: str
+ """Filter hardware configurations by model compatibility.
+
+ When provided, the response includes availability status for each compatible
+ configuration.
+ """
diff --git a/src/together/types/hardware_list_response.py b/src/together/types/hardware_list_response.py
new file mode 100644
index 00000000..43481726
--- /dev/null
+++ b/src/together/types/hardware_list_response.py
@@ -0,0 +1,58 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["HardwareListResponse", "Data", "DataPricing", "DataSpecs", "DataAvailability"]
+
+
+class DataPricing(BaseModel):
+ cents_per_minute: float
+ """Cost per minute of endpoint uptime in cents"""
+
+
+class DataSpecs(BaseModel):
+ gpu_count: int
+ """Number of GPUs in this configuration"""
+
+ gpu_link: str
+ """The GPU interconnect technology"""
+
+ gpu_memory: float
+ """Amount of GPU memory in GB"""
+
+ gpu_type: str
+ """The type/model of GPU"""
+
+
+class DataAvailability(BaseModel):
+ status: Literal["available", "unavailable", "insufficient"]
+ """The availability status of the hardware configuration"""
+
+
+class Data(BaseModel):
+ id: str
+ """Unique identifier for the hardware configuration"""
+
+ object: Literal["hardware"]
+
+ pricing: DataPricing
+ """Pricing details for using an endpoint"""
+
+ specs: DataSpecs
+ """Detailed specifications of a hardware configuration"""
+
+ updated_at: datetime
+ """Timestamp of when the hardware status was last updated"""
+
+ availability: Optional[DataAvailability] = None
+ """Indicates the current availability status of a hardware configuration"""
+
+
+class HardwareListResponse(BaseModel):
+ data: List[Data]
+
+ object: Literal["list"]
diff --git a/src/together/types/job_list_response.py b/src/together/types/job_list_response.py
new file mode 100644
index 00000000..11281d23
--- /dev/null
+++ b/src/together/types/job_list_response.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["JobListResponse", "Data", "DataArgs", "DataStatusUpdate"]
+
+
+class DataArgs(BaseModel):
+ description: Optional[str] = None
+
+ api_model_name: Optional[str] = FieldInfo(alias="modelName", default=None)
+
+ api_model_source: Optional[str] = FieldInfo(alias="modelSource", default=None)
+
+
+class DataStatusUpdate(BaseModel):
+ message: str
+
+ status: str
+
+ timestamp: datetime
+
+
+class Data(BaseModel):
+ args: DataArgs
+
+ created_at: datetime
+
+ job_id: str
+
+ status: Literal["Queued", "Running", "Complete", "Failed"]
+
+ status_updates: List[DataStatusUpdate]
+
+ type: str
+
+ updated_at: datetime
+
+
+class JobListResponse(BaseModel):
+ data: List[Data]
diff --git a/src/together/types/job_retrieve_response.py b/src/together/types/job_retrieve_response.py
new file mode 100644
index 00000000..ded83144
--- /dev/null
+++ b/src/together/types/job_retrieve_response.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["JobRetrieveResponse", "Args", "StatusUpdate"]
+
+
+class Args(BaseModel):
+ description: Optional[str] = None
+
+ api_model_name: Optional[str] = FieldInfo(alias="modelName", default=None)
+
+ api_model_source: Optional[str] = FieldInfo(alias="modelSource", default=None)
+
+
+class StatusUpdate(BaseModel):
+ message: str
+
+ status: str
+
+ timestamp: datetime
+
+
+class JobRetrieveResponse(BaseModel):
+ args: Args
+
+ created_at: datetime
+
+ job_id: str
+
+ status: Literal["Queued", "Running", "Complete", "Failed"]
+
+ status_updates: List[StatusUpdate]
+
+ type: str
+
+ updated_at: datetime
diff --git a/src/together/types/model_upload_params.py b/src/together/types/model_upload_params.py
new file mode 100644
index 00000000..9a159405
--- /dev/null
+++ b/src/together/types/model_upload_params.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ModelUploadParams"]
+
+
+class ModelUploadParams(TypedDict, total=False):
+ model_name: Required[str]
+ """The name to give to your uploaded model"""
+
+ model_source: Required[str]
+ """The source location of the model (Hugging Face repo or S3 path)"""
+
+ base_model: str
+ """
+ The base model to use for an adapter if setting it to run against a serverless
+ pool. Only used for model_type `adapter`.
+ """
+
+ description: str
+ """A description of your model"""
+
+ hf_token: str
+ """Hugging Face token (if uploading from Hugging Face)"""
+
+ lora_model: str
+ """
+ The lora pool to use for an adapter if setting it to run against, say, a
+ dedicated pool. Only used for model_type `adapter`.
+ """
+
+ model_type: Literal["model", "adapter"]
+ """Whether the model is a full model or an adapter"""
diff --git a/src/together/types/model_upload_response.py b/src/together/types/model_upload_response.py
new file mode 100644
index 00000000..9b8d9237
--- /dev/null
+++ b/src/together/types/model_upload_response.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["ModelUploadResponse", "Data"]
+
+
+class Data(BaseModel):
+ job_id: str
+
+ api_model_id: str = FieldInfo(alias="model_id")
+
+ api_model_name: str = FieldInfo(alias="model_name")
+
+ api_model_source: str = FieldInfo(alias="model_source")
+
+
+class ModelUploadResponse(BaseModel):
+ data: Data
+
+ message: str
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 88553cec..207023a6 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -26,7 +26,7 @@ def test_method_create_overload_1(self, client: Together) -> None:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
)
assert_matches_type(ChatCompletion, completion, path=["response"])
@@ -39,7 +39,7 @@ def test_method_create_with_all_params_overload_1(self, client: Together) -> Non
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
context_length_exceeded_behavior="truncate",
echo=True,
frequency_penalty=0,
@@ -55,7 +55,7 @@ def test_method_create_with_all_params_overload_1(self, client: Together) -> Non
presence_penalty=0,
repetition_penalty=0,
response_format={
- "schema": {"foo": "string"},
+ "schema": {"foo": "bar"},
"type": "json",
},
safety_model="safety_model_name",
@@ -88,7 +88,7 @@ def test_raw_response_create_overload_1(self, client: Together) -> None:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
)
assert response.is_closed is True
@@ -105,7 +105,7 @@ def test_streaming_response_create_overload_1(self, client: Together) -> None:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -124,7 +124,7 @@ def test_method_create_overload_2(self, client: Together) -> None:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
stream=True,
)
completion_stream.response.close()
@@ -138,7 +138,7 @@ def test_method_create_with_all_params_overload_2(self, client: Together) -> Non
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
stream=True,
context_length_exceeded_behavior="truncate",
echo=True,
@@ -155,7 +155,7 @@ def test_method_create_with_all_params_overload_2(self, client: Together) -> Non
presence_penalty=0,
repetition_penalty=0,
response_format={
- "schema": {"foo": "string"},
+ "schema": {"foo": "bar"},
"type": "json",
},
safety_model="safety_model_name",
@@ -187,7 +187,7 @@ def test_raw_response_create_overload_2(self, client: Together) -> None:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
stream=True,
)
@@ -204,7 +204,7 @@ def test_streaming_response_create_overload_2(self, client: Together) -> None:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
stream=True,
) as response:
assert not response.is_closed
@@ -228,7 +228,7 @@ async def test_method_create_overload_1(self, async_client: AsyncTogether) -> No
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
)
assert_matches_type(ChatCompletion, completion, path=["response"])
@@ -241,7 +241,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
context_length_exceeded_behavior="truncate",
echo=True,
frequency_penalty=0,
@@ -257,7 +257,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
presence_penalty=0,
repetition_penalty=0,
response_format={
- "schema": {"foo": "string"},
+ "schema": {"foo": "bar"},
"type": "json",
},
safety_model="safety_model_name",
@@ -290,7 +290,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncTogether)
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
)
assert response.is_closed is True
@@ -307,7 +307,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncTog
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -326,7 +326,7 @@ async def test_method_create_overload_2(self, async_client: AsyncTogether) -> No
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
stream=True,
)
await completion_stream.response.aclose()
@@ -340,7 +340,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
stream=True,
context_length_exceeded_behavior="truncate",
echo=True,
@@ -357,7 +357,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
presence_penalty=0,
repetition_penalty=0,
response_format={
- "schema": {"foo": "string"},
+ "schema": {"foo": "bar"},
"type": "json",
},
safety_model="safety_model_name",
@@ -389,7 +389,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncTogether)
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
stream=True,
)
@@ -406,7 +406,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncTog
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
stream=True,
) as response:
assert not response.is_closed
diff --git a/tests/api_resources/code_interpreter/__init__.py b/tests/api_resources/code_interpreter/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/code_interpreter/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/code_interpreter/test_sessions.py b/tests/api_resources/code_interpreter/test_sessions.py
new file mode 100644
index 00000000..e53d7a4a
--- /dev/null
+++ b/tests/api_resources/code_interpreter/test_sessions.py
@@ -0,0 +1,90 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from together import Together, AsyncTogether
+from tests.utils import assert_matches_type
+from together.types.code_interpreter import SessionListResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestSessions:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ def test_method_list(self, client: Together) -> None:
+ session = client.code_interpreter.sessions.list()
+ assert_matches_type(SessionListResponse, session, path=["response"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ def test_raw_response_list(self, client: Together) -> None:
+ response = client.code_interpreter.sessions.with_raw_response.list()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ session = response.parse()
+ assert_matches_type(SessionListResponse, session, path=["response"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ def test_streaming_response_list(self, client: Together) -> None:
+ with client.code_interpreter.sessions.with_streaming_response.list() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ session = response.parse()
+ assert_matches_type(SessionListResponse, session, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncSessions:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ async def test_method_list(self, async_client: AsyncTogether) -> None:
+ session = await async_client.code_interpreter.sessions.list()
+ assert_matches_type(SessionListResponse, session, path=["response"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ async def test_raw_response_list(self, async_client: AsyncTogether) -> None:
+ response = await async_client.code_interpreter.sessions.with_raw_response.list()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ session = await response.parse()
+ assert_matches_type(SessionListResponse, session, path=["response"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ async def test_streaming_response_list(self, async_client: AsyncTogether) -> None:
+ async with async_client.code_interpreter.sessions.with_streaming_response.list() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ session = await response.parse()
+ assert_matches_type(SessionListResponse, session, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_code_interpreter.py b/tests/api_resources/test_code_interpreter.py
new file mode 100644
index 00000000..17c1928c
--- /dev/null
+++ b/tests/api_resources/test_code_interpreter.py
@@ -0,0 +1,146 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from together import Together, AsyncTogether
+from tests.utils import assert_matches_type
+from together.types import ExecuteResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestCodeInterpreter:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ def test_method_execute(self, client: Together) -> None:
+ code_interpreter = client.code_interpreter.execute(
+ code="print('Hello, world!')",
+ language="python",
+ )
+ assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ def test_method_execute_with_all_params(self, client: Together) -> None:
+ code_interpreter = client.code_interpreter.execute(
+ code="print('Hello, world!')",
+ language="python",
+ files=[
+ {
+ "content": "content",
+ "encoding": "string",
+ "name": "name",
+ }
+ ],
+ session_id="ses_abcDEF123",
+ )
+ assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ def test_raw_response_execute(self, client: Together) -> None:
+ response = client.code_interpreter.with_raw_response.execute(
+ code="print('Hello, world!')",
+ language="python",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ code_interpreter = response.parse()
+ assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ def test_streaming_response_execute(self, client: Together) -> None:
+ with client.code_interpreter.with_streaming_response.execute(
+ code="print('Hello, world!')",
+ language="python",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ code_interpreter = response.parse()
+ assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncCodeInterpreter:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ async def test_method_execute(self, async_client: AsyncTogether) -> None:
+ code_interpreter = await async_client.code_interpreter.execute(
+ code="print('Hello, world!')",
+ language="python",
+ )
+ assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ async def test_method_execute_with_all_params(self, async_client: AsyncTogether) -> None:
+ code_interpreter = await async_client.code_interpreter.execute(
+ code="print('Hello, world!')",
+ language="python",
+ files=[
+ {
+ "content": "content",
+ "encoding": "string",
+ "name": "name",
+ }
+ ],
+ session_id="ses_abcDEF123",
+ )
+ assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ async def test_raw_response_execute(self, async_client: AsyncTogether) -> None:
+ response = await async_client.code_interpreter.with_raw_response.execute(
+ code="print('Hello, world!')",
+ language="python",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ code_interpreter = await response.parse()
+ assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+ @pytest.mark.skip(
+ reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+ )
+ @parametrize
+ async def test_streaming_response_execute(self, async_client: AsyncTogether) -> None:
+ async with async_client.code_interpreter.with_streaming_response.execute(
+ code="print('Hello, world!')",
+ language="python",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ code_interpreter = await response.parse()
+ assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py
index a81e24c5..084ad480 100644
--- a/tests/api_resources/test_embeddings.py
+++ b/tests/api_resources/test_embeddings.py
@@ -21,7 +21,7 @@ class TestEmbeddings:
def test_method_create(self, client: Together) -> None:
embedding = client.embeddings.create(
input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
- model="WhereIsAI/UAE-Large-V1",
+ model="togethercomputer/m2-bert-80M-8k-retrieval",
)
assert_matches_type(Embedding, embedding, path=["response"])
@@ -29,7 +29,7 @@ def test_method_create(self, client: Together) -> None:
def test_raw_response_create(self, client: Together) -> None:
response = client.embeddings.with_raw_response.create(
input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
- model="WhereIsAI/UAE-Large-V1",
+ model="togethercomputer/m2-bert-80M-8k-retrieval",
)
assert response.is_closed is True
@@ -41,7 +41,7 @@ def test_raw_response_create(self, client: Together) -> None:
def test_streaming_response_create(self, client: Together) -> None:
with client.embeddings.with_streaming_response.create(
input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
- model="WhereIsAI/UAE-Large-V1",
+ model="togethercomputer/m2-bert-80M-8k-retrieval",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -59,7 +59,7 @@ class TestAsyncEmbeddings:
async def test_method_create(self, async_client: AsyncTogether) -> None:
embedding = await async_client.embeddings.create(
input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
- model="WhereIsAI/UAE-Large-V1",
+ model="togethercomputer/m2-bert-80M-8k-retrieval",
)
assert_matches_type(Embedding, embedding, path=["response"])
@@ -67,7 +67,7 @@ async def test_method_create(self, async_client: AsyncTogether) -> None:
async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
response = await async_client.embeddings.with_raw_response.create(
input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
- model="WhereIsAI/UAE-Large-V1",
+ model="togethercomputer/m2-bert-80M-8k-retrieval",
)
assert response.is_closed is True
@@ -79,7 +79,7 @@ async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
async def test_streaming_response_create(self, async_client: AsyncTogether) -> None:
async with async_client.embeddings.with_streaming_response.create(
input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
- model="WhereIsAI/UAE-Large-V1",
+ model="togethercomputer/m2-bert-80M-8k-retrieval",
) as response:
assert not response.is_closed
assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_endpoints.py b/tests/api_resources/test_endpoints.py
new file mode 100644
index 00000000..59cbc6ab
--- /dev/null
+++ b/tests/api_resources/test_endpoints.py
@@ -0,0 +1,473 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from together import Together, AsyncTogether
+from tests.utils import assert_matches_type
+from together.types import (
+ EndpointListResponse,
+ EndpointCreateResponse,
+ EndpointUpdateResponse,
+ EndpointRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestEndpoints:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_create(self, client: Together) -> None:
+ endpoint = client.endpoints.create(
+ autoscaling={
+ "max_replicas": 5,
+ "min_replicas": 2,
+ },
+ hardware="1x_nvidia_a100_80gb_sxm",
+ model="meta-llama/Llama-3-8b-chat-hf",
+ )
+ assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+ @parametrize
+ def test_method_create_with_all_params(self, client: Together) -> None:
+ endpoint = client.endpoints.create(
+ autoscaling={
+ "max_replicas": 5,
+ "min_replicas": 2,
+ },
+ hardware="1x_nvidia_a100_80gb_sxm",
+ model="meta-llama/Llama-3-8b-chat-hf",
+ disable_prompt_cache=True,
+ disable_speculative_decoding=True,
+ display_name="My Llama3 70b endpoint",
+ inactive_timeout=60,
+ state="STARTED",
+ )
+ assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+ @parametrize
+ def test_raw_response_create(self, client: Together) -> None:
+ response = client.endpoints.with_raw_response.create(
+ autoscaling={
+ "max_replicas": 5,
+ "min_replicas": 2,
+ },
+ hardware="1x_nvidia_a100_80gb_sxm",
+ model="meta-llama/Llama-3-8b-chat-hf",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ endpoint = response.parse()
+ assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+ @parametrize
+ def test_streaming_response_create(self, client: Together) -> None:
+ with client.endpoints.with_streaming_response.create(
+ autoscaling={
+ "max_replicas": 5,
+ "min_replicas": 2,
+ },
+ hardware="1x_nvidia_a100_80gb_sxm",
+ model="meta-llama/Llama-3-8b-chat-hf",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ endpoint = response.parse()
+ assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_method_retrieve(self, client: Together) -> None:
+ endpoint = client.endpoints.retrieve(
+ "endpointId",
+ )
+ assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+ @parametrize
+ def test_raw_response_retrieve(self, client: Together) -> None:
+ response = client.endpoints.with_raw_response.retrieve(
+ "endpointId",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ endpoint = response.parse()
+ assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+ @parametrize
+ def test_streaming_response_retrieve(self, client: Together) -> None:
+ with client.endpoints.with_streaming_response.retrieve(
+ "endpointId",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ endpoint = response.parse()
+ assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_retrieve(self, client: Together) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+ client.endpoints.with_raw_response.retrieve(
+ "",
+ )
+
+ @parametrize
+ def test_method_update(self, client: Together) -> None:
+ endpoint = client.endpoints.update(
+ endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+ )
+ assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+ @parametrize
+ def test_method_update_with_all_params(self, client: Together) -> None:
+ endpoint = client.endpoints.update(
+ endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+ autoscaling={
+ "max_replicas": 5,
+ "min_replicas": 2,
+ },
+ display_name="My Llama3 70b endpoint",
+ inactive_timeout=60,
+ state="STARTED",
+ )
+ assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+ @parametrize
+ def test_raw_response_update(self, client: Together) -> None:
+ response = client.endpoints.with_raw_response.update(
+ endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ endpoint = response.parse()
+ assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+ @parametrize
+ def test_streaming_response_update(self, client: Together) -> None:
+ with client.endpoints.with_streaming_response.update(
+ endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ endpoint = response.parse()
+ assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_update(self, client: Together) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+ client.endpoints.with_raw_response.update(
+ endpoint_id="",
+ )
+
+ @parametrize
+ def test_method_list(self, client: Together) -> None:
+ endpoint = client.endpoints.list()
+ assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+ @parametrize
+ def test_method_list_with_all_params(self, client: Together) -> None:
+ endpoint = client.endpoints.list(
+ type="dedicated",
+ )
+ assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+ @parametrize
+ def test_raw_response_list(self, client: Together) -> None:
+ response = client.endpoints.with_raw_response.list()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ endpoint = response.parse()
+ assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+ @parametrize
+ def test_streaming_response_list(self, client: Together) -> None:
+ with client.endpoints.with_streaming_response.list() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ endpoint = response.parse()
+ assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_method_delete(self, client: Together) -> None:
+ endpoint = client.endpoints.delete(
+ "endpointId",
+ )
+ assert endpoint is None
+
+ @parametrize
+ def test_raw_response_delete(self, client: Together) -> None:
+ response = client.endpoints.with_raw_response.delete(
+ "endpointId",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ endpoint = response.parse()
+ assert endpoint is None
+
+ @parametrize
+ def test_streaming_response_delete(self, client: Together) -> None:
+ with client.endpoints.with_streaming_response.delete(
+ "endpointId",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ endpoint = response.parse()
+ assert endpoint is None
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_delete(self, client: Together) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+ client.endpoints.with_raw_response.delete(
+ "",
+ )
+
+
+class TestAsyncEndpoints:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_create(self, async_client: AsyncTogether) -> None:
+ endpoint = await async_client.endpoints.create(
+ autoscaling={
+ "max_replicas": 5,
+ "min_replicas": 2,
+ },
+ hardware="1x_nvidia_a100_80gb_sxm",
+ model="meta-llama/Llama-3-8b-chat-hf",
+ )
+ assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+ @parametrize
+ async def test_method_create_with_all_params(self, async_client: AsyncTogether) -> None:
+ endpoint = await async_client.endpoints.create(
+ autoscaling={
+ "max_replicas": 5,
+ "min_replicas": 2,
+ },
+ hardware="1x_nvidia_a100_80gb_sxm",
+ model="meta-llama/Llama-3-8b-chat-hf",
+ disable_prompt_cache=True,
+ disable_speculative_decoding=True,
+ display_name="My Llama3 70b endpoint",
+ inactive_timeout=60,
+ state="STARTED",
+ )
+ assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+ @parametrize
+ async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
+ response = await async_client.endpoints.with_raw_response.create(
+ autoscaling={
+ "max_replicas": 5,
+ "min_replicas": 2,
+ },
+ hardware="1x_nvidia_a100_80gb_sxm",
+ model="meta-llama/Llama-3-8b-chat-hf",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ endpoint = await response.parse()
+ assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_create(self, async_client: AsyncTogether) -> None:
+ async with async_client.endpoints.with_streaming_response.create(
+ autoscaling={
+ "max_replicas": 5,
+ "min_replicas": 2,
+ },
+ hardware="1x_nvidia_a100_80gb_sxm",
+ model="meta-llama/Llama-3-8b-chat-hf",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ endpoint = await response.parse()
+ assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_retrieve(self, async_client: AsyncTogether) -> None:
+ endpoint = await async_client.endpoints.retrieve(
+ "endpointId",
+ )
+ assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+ @parametrize
+ async def test_raw_response_retrieve(self, async_client: AsyncTogether) -> None:
+ response = await async_client.endpoints.with_raw_response.retrieve(
+ "endpointId",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ endpoint = await response.parse()
+ assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_retrieve(self, async_client: AsyncTogether) -> None:
+ async with async_client.endpoints.with_streaming_response.retrieve(
+ "endpointId",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ endpoint = await response.parse()
+ assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_retrieve(self, async_client: AsyncTogether) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+ await async_client.endpoints.with_raw_response.retrieve(
+ "",
+ )
+
+ @parametrize
+ async def test_method_update(self, async_client: AsyncTogether) -> None:
+ endpoint = await async_client.endpoints.update(
+ endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+ )
+ assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+ @parametrize
+ async def test_method_update_with_all_params(self, async_client: AsyncTogether) -> None:
+ endpoint = await async_client.endpoints.update(
+ endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+ autoscaling={
+ "max_replicas": 5,
+ "min_replicas": 2,
+ },
+ display_name="My Llama3 70b endpoint",
+ inactive_timeout=60,
+ state="STARTED",
+ )
+ assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+ @parametrize
+ async def test_raw_response_update(self, async_client: AsyncTogether) -> None:
+ response = await async_client.endpoints.with_raw_response.update(
+ endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ endpoint = await response.parse()
+ assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_update(self, async_client: AsyncTogether) -> None:
+ async with async_client.endpoints.with_streaming_response.update(
+ endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ endpoint = await response.parse()
+ assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_update(self, async_client: AsyncTogether) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+ await async_client.endpoints.with_raw_response.update(
+ endpoint_id="",
+ )
+
+ @parametrize
+ async def test_method_list(self, async_client: AsyncTogether) -> None:
+ endpoint = await async_client.endpoints.list()
+ assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+ @parametrize
+ async def test_method_list_with_all_params(self, async_client: AsyncTogether) -> None:
+ endpoint = await async_client.endpoints.list(
+ type="dedicated",
+ )
+ assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+ @parametrize
+ async def test_raw_response_list(self, async_client: AsyncTogether) -> None:
+ response = await async_client.endpoints.with_raw_response.list()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ endpoint = await response.parse()
+ assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_list(self, async_client: AsyncTogether) -> None:
+ async with async_client.endpoints.with_streaming_response.list() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ endpoint = await response.parse()
+ assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_delete(self, async_client: AsyncTogether) -> None:
+ endpoint = await async_client.endpoints.delete(
+ "endpointId",
+ )
+ assert endpoint is None
+
+ @parametrize
+ async def test_raw_response_delete(self, async_client: AsyncTogether) -> None:
+ response = await async_client.endpoints.with_raw_response.delete(
+ "endpointId",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ endpoint = await response.parse()
+ assert endpoint is None
+
+ @parametrize
+ async def test_streaming_response_delete(self, async_client: AsyncTogether) -> None:
+ async with async_client.endpoints.with_streaming_response.delete(
+ "endpointId",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ endpoint = await response.parse()
+ assert endpoint is None
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_delete(self, async_client: AsyncTogether) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+ await async_client.endpoints.with_raw_response.delete(
+ "",
+ )
diff --git a/tests/api_resources/test_fine_tune.py b/tests/api_resources/test_fine_tune.py
index 8a4eda52..4545089a 100644
--- a/tests/api_resources/test_fine_tune.py
+++ b/tests/api_resources/test_fine_tune.py
@@ -22,6 +22,7 @@
class TestFineTune:
parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+ @pytest.mark.skip(reason="invalid oneOf in required props")
@parametrize
def test_method_create(self, client: Together) -> None:
fine_tune = client.fine_tune.create(
@@ -30,15 +31,17 @@ def test_method_create(self, client: Together) -> None:
)
assert_matches_type(FineTune, fine_tune, path=["response"])
+ @pytest.mark.skip(reason="invalid oneOf in required props")
@parametrize
def test_method_create_with_all_params(self, client: Together) -> None:
fine_tune = client.fine_tune.create(
model="model",
training_file="training_file",
batch_size=0,
+ from_checkpoint="from_checkpoint",
learning_rate=0,
lr_scheduler={
- "lr_scheduler_type": "lr_scheduler_type",
+ "lr_scheduler_type": "linear",
"lr_scheduler_args": {"min_lr_ratio": 0},
},
max_grad_norm=0,
@@ -47,6 +50,7 @@ def test_method_create_with_all_params(self, client: Together) -> None:
n_evals=0,
suffix="suffix",
train_on_inputs=True,
+ training_method={"method": "sft"},
training_type={"type": "Full"},
validation_file="validation_file",
wandb_api_key="wandb_api_key",
@@ -58,6 +62,7 @@ def test_method_create_with_all_params(self, client: Together) -> None:
)
assert_matches_type(FineTune, fine_tune, path=["response"])
+ @pytest.mark.skip(reason="invalid oneOf in required props")
@parametrize
def test_raw_response_create(self, client: Together) -> None:
response = client.fine_tune.with_raw_response.create(
@@ -70,6 +75,7 @@ def test_raw_response_create(self, client: Together) -> None:
fine_tune = response.parse()
assert_matches_type(FineTune, fine_tune, path=["response"])
+ @pytest.mark.skip(reason="invalid oneOf in required props")
@parametrize
def test_streaming_response_create(self, client: Together) -> None:
with client.fine_tune.with_streaming_response.create(
@@ -268,6 +274,7 @@ def test_path_params_list_events(self, client: Together) -> None:
class TestAsyncFineTune:
parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+ @pytest.mark.skip(reason="invalid oneOf in required props")
@parametrize
async def test_method_create(self, async_client: AsyncTogether) -> None:
fine_tune = await async_client.fine_tune.create(
@@ -276,15 +283,17 @@ async def test_method_create(self, async_client: AsyncTogether) -> None:
)
assert_matches_type(FineTune, fine_tune, path=["response"])
+ @pytest.mark.skip(reason="invalid oneOf in required props")
@parametrize
async def test_method_create_with_all_params(self, async_client: AsyncTogether) -> None:
fine_tune = await async_client.fine_tune.create(
model="model",
training_file="training_file",
batch_size=0,
+ from_checkpoint="from_checkpoint",
learning_rate=0,
lr_scheduler={
- "lr_scheduler_type": "lr_scheduler_type",
+ "lr_scheduler_type": "linear",
"lr_scheduler_args": {"min_lr_ratio": 0},
},
max_grad_norm=0,
@@ -293,6 +302,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether)
n_evals=0,
suffix="suffix",
train_on_inputs=True,
+ training_method={"method": "sft"},
training_type={"type": "Full"},
validation_file="validation_file",
wandb_api_key="wandb_api_key",
@@ -304,6 +314,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether)
)
assert_matches_type(FineTune, fine_tune, path=["response"])
+ @pytest.mark.skip(reason="invalid oneOf in required props")
@parametrize
async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
response = await async_client.fine_tune.with_raw_response.create(
@@ -316,6 +327,7 @@ async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
fine_tune = await response.parse()
assert_matches_type(FineTune, fine_tune, path=["response"])
+ @pytest.mark.skip(reason="invalid oneOf in required props")
@parametrize
async def test_streaming_response_create(self, async_client: AsyncTogether) -> None:
async with async_client.fine_tune.with_streaming_response.create(
diff --git a/tests/api_resources/test_hardware.py b/tests/api_resources/test_hardware.py
new file mode 100644
index 00000000..aafe18f0
--- /dev/null
+++ b/tests/api_resources/test_hardware.py
@@ -0,0 +1,86 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from together import Together, AsyncTogether
+from tests.utils import assert_matches_type
+from together.types import HardwareListResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestHardware:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_list(self, client: Together) -> None:
+ hardware = client.hardware.list()
+ assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+ @parametrize
+ def test_method_list_with_all_params(self, client: Together) -> None:
+ hardware = client.hardware.list(
+ model="model",
+ )
+ assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+ @parametrize
+ def test_raw_response_list(self, client: Together) -> None:
+ response = client.hardware.with_raw_response.list()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ hardware = response.parse()
+ assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+ @parametrize
+ def test_streaming_response_list(self, client: Together) -> None:
+ with client.hardware.with_streaming_response.list() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ hardware = response.parse()
+ assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncHardware:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_list(self, async_client: AsyncTogether) -> None:
+ hardware = await async_client.hardware.list()
+ assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+ @parametrize
+ async def test_method_list_with_all_params(self, async_client: AsyncTogether) -> None:
+ hardware = await async_client.hardware.list(
+ model="model",
+ )
+ assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+ @parametrize
+ async def test_raw_response_list(self, async_client: AsyncTogether) -> None:
+ response = await async_client.hardware.with_raw_response.list()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ hardware = await response.parse()
+ assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_list(self, async_client: AsyncTogether) -> None:
+ async with async_client.hardware.with_streaming_response.list() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ hardware = await response.parse()
+ assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_images.py b/tests/api_resources/test_images.py
index 2d1dda01..0e84fb5f 100644
--- a/tests/api_resources/test_images.py
+++ b/tests/api_resources/test_images.py
@@ -21,7 +21,7 @@ class TestImages:
@parametrize
def test_method_create(self, client: Together) -> None:
image = client.images.create(
- model="black-forest-labs/FLUX.1-schnell-Free",
+ model="black-forest-labs/FLUX.1-schnell",
prompt="cat floating in space, cinematic",
)
assert_matches_type(ImageFile, image, path=["response"])
@@ -30,7 +30,7 @@ def test_method_create(self, client: Together) -> None:
@parametrize
def test_method_create_with_all_params(self, client: Together) -> None:
image = client.images.create(
- model="black-forest-labs/FLUX.1-schnell-Free",
+ model="black-forest-labs/FLUX.1-schnell",
prompt="cat floating in space, cinematic",
guidance=0,
height=0,
@@ -55,7 +55,7 @@ def test_method_create_with_all_params(self, client: Together) -> None:
@parametrize
def test_raw_response_create(self, client: Together) -> None:
response = client.images.with_raw_response.create(
- model="black-forest-labs/FLUX.1-schnell-Free",
+ model="black-forest-labs/FLUX.1-schnell",
prompt="cat floating in space, cinematic",
)
@@ -68,7 +68,7 @@ def test_raw_response_create(self, client: Together) -> None:
@parametrize
def test_streaming_response_create(self, client: Together) -> None:
with client.images.with_streaming_response.create(
- model="black-forest-labs/FLUX.1-schnell-Free",
+ model="black-forest-labs/FLUX.1-schnell",
prompt="cat floating in space, cinematic",
) as response:
assert not response.is_closed
@@ -87,7 +87,7 @@ class TestAsyncImages:
@parametrize
async def test_method_create(self, async_client: AsyncTogether) -> None:
image = await async_client.images.create(
- model="black-forest-labs/FLUX.1-schnell-Free",
+ model="black-forest-labs/FLUX.1-schnell",
prompt="cat floating in space, cinematic",
)
assert_matches_type(ImageFile, image, path=["response"])
@@ -96,7 +96,7 @@ async def test_method_create(self, async_client: AsyncTogether) -> None:
@parametrize
async def test_method_create_with_all_params(self, async_client: AsyncTogether) -> None:
image = await async_client.images.create(
- model="black-forest-labs/FLUX.1-schnell-Free",
+ model="black-forest-labs/FLUX.1-schnell",
prompt="cat floating in space, cinematic",
guidance=0,
height=0,
@@ -121,7 +121,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether)
@parametrize
async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
response = await async_client.images.with_raw_response.create(
- model="black-forest-labs/FLUX.1-schnell-Free",
+ model="black-forest-labs/FLUX.1-schnell",
prompt="cat floating in space, cinematic",
)
@@ -134,7 +134,7 @@ async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
@parametrize
async def test_streaming_response_create(self, async_client: AsyncTogether) -> None:
async with async_client.images.with_streaming_response.create(
- model="black-forest-labs/FLUX.1-schnell-Free",
+ model="black-forest-labs/FLUX.1-schnell",
prompt="cat floating in space, cinematic",
) as response:
assert not response.is_closed
diff --git a/tests/api_resources/test_jobs.py b/tests/api_resources/test_jobs.py
new file mode 100644
index 00000000..110600d7
--- /dev/null
+++ b/tests/api_resources/test_jobs.py
@@ -0,0 +1,148 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from together import Together, AsyncTogether
+from tests.utils import assert_matches_type
+from together.types import JobListResponse, JobRetrieveResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestJobs:
+ parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ def test_method_retrieve(self, client: Together) -> None:
+ job = client.jobs.retrieve(
+ "jobId",
+ )
+ assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+ @parametrize
+ def test_raw_response_retrieve(self, client: Together) -> None:
+ response = client.jobs.with_raw_response.retrieve(
+ "jobId",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ job = response.parse()
+ assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+ @parametrize
+ def test_streaming_response_retrieve(self, client: Together) -> None:
+ with client.jobs.with_streaming_response.retrieve(
+ "jobId",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ job = response.parse()
+ assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ def test_path_params_retrieve(self, client: Together) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+ client.jobs.with_raw_response.retrieve(
+ "",
+ )
+
+ @parametrize
+ def test_method_list(self, client: Together) -> None:
+ job = client.jobs.list()
+ assert_matches_type(JobListResponse, job, path=["response"])
+
+ @parametrize
+ def test_raw_response_list(self, client: Together) -> None:
+ response = client.jobs.with_raw_response.list()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ job = response.parse()
+ assert_matches_type(JobListResponse, job, path=["response"])
+
+ @parametrize
+ def test_streaming_response_list(self, client: Together) -> None:
+ with client.jobs.with_streaming_response.list() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ job = response.parse()
+ assert_matches_type(JobListResponse, job, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncJobs:
+ parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+ @parametrize
+ async def test_method_retrieve(self, async_client: AsyncTogether) -> None:
+ job = await async_client.jobs.retrieve(
+ "jobId",
+ )
+ assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+ @parametrize
+ async def test_raw_response_retrieve(self, async_client: AsyncTogether) -> None:
+ response = await async_client.jobs.with_raw_response.retrieve(
+ "jobId",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ job = await response.parse()
+ assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_retrieve(self, async_client: AsyncTogether) -> None:
+ async with async_client.jobs.with_streaming_response.retrieve(
+ "jobId",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ job = await response.parse()
+ assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_path_params_retrieve(self, async_client: AsyncTogether) -> None:
+ with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+ await async_client.jobs.with_raw_response.retrieve(
+ "",
+ )
+
+ @parametrize
+ async def test_method_list(self, async_client: AsyncTogether) -> None:
+ job = await async_client.jobs.list()
+ assert_matches_type(JobListResponse, job, path=["response"])
+
+ @parametrize
+ async def test_raw_response_list(self, async_client: AsyncTogether) -> None:
+ response = await async_client.jobs.with_raw_response.list()
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ job = await response.parse()
+ assert_matches_type(JobListResponse, job, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_list(self, async_client: AsyncTogether) -> None:
+ async with async_client.jobs.with_streaming_response.list() as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ job = await response.parse()
+ assert_matches_type(JobListResponse, job, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index 4a0d63e8..fbf910a0 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -9,7 +9,7 @@
from together import Together, AsyncTogether
from tests.utils import assert_matches_type
-from together.types import ModelListResponse
+from together.types import ModelListResponse, ModelUploadResponse
base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -42,6 +42,53 @@ def test_streaming_response_list(self, client: Together) -> None:
assert cast(Any, response.is_closed) is True
+ @parametrize
+ def test_method_upload(self, client: Together) -> None:
+ model = client.models.upload(
+ model_name="Qwen2.5-72B-Instruct",
+ model_source="unsloth/Qwen2.5-72B-Instruct",
+ )
+ assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+ @parametrize
+ def test_method_upload_with_all_params(self, client: Together) -> None:
+ model = client.models.upload(
+ model_name="Qwen2.5-72B-Instruct",
+ model_source="unsloth/Qwen2.5-72B-Instruct",
+ base_model="Qwen/Qwen2.5-72B-Instruct",
+ description="Finetuned Qwen2.5-72B-Instruct by Unsloth",
+ hf_token="hf_examplehuggingfacetoken",
+ lora_model="my_username/Qwen2.5-72B-Instruct-lora",
+ model_type="model",
+ )
+ assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+ @parametrize
+ def test_raw_response_upload(self, client: Together) -> None:
+ response = client.models.with_raw_response.upload(
+ model_name="Qwen2.5-72B-Instruct",
+ model_source="unsloth/Qwen2.5-72B-Instruct",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ model = response.parse()
+ assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+ @parametrize
+ def test_streaming_response_upload(self, client: Together) -> None:
+ with client.models.with_streaming_response.upload(
+ model_name="Qwen2.5-72B-Instruct",
+ model_source="unsloth/Qwen2.5-72B-Instruct",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ model = response.parse()
+ assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
+
class TestAsyncModels:
parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
@@ -70,3 +117,50 @@ async def test_streaming_response_list(self, async_client: AsyncTogether) -> Non
assert_matches_type(ModelListResponse, model, path=["response"])
assert cast(Any, response.is_closed) is True
+
+ @parametrize
+ async def test_method_upload(self, async_client: AsyncTogether) -> None:
+ model = await async_client.models.upload(
+ model_name="Qwen2.5-72B-Instruct",
+ model_source="unsloth/Qwen2.5-72B-Instruct",
+ )
+ assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+ @parametrize
+ async def test_method_upload_with_all_params(self, async_client: AsyncTogether) -> None:
+ model = await async_client.models.upload(
+ model_name="Qwen2.5-72B-Instruct",
+ model_source="unsloth/Qwen2.5-72B-Instruct",
+ base_model="Qwen/Qwen2.5-72B-Instruct",
+ description="Finetuned Qwen2.5-72B-Instruct by Unsloth",
+ hf_token="hf_examplehuggingfacetoken",
+ lora_model="my_username/Qwen2.5-72B-Instruct-lora",
+ model_type="model",
+ )
+ assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+ @parametrize
+ async def test_raw_response_upload(self, async_client: AsyncTogether) -> None:
+ response = await async_client.models.with_raw_response.upload(
+ model_name="Qwen2.5-72B-Instruct",
+ model_source="unsloth/Qwen2.5-72B-Instruct",
+ )
+
+ assert response.is_closed is True
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+ model = await response.parse()
+ assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+ @parametrize
+ async def test_streaming_response_upload(self, async_client: AsyncTogether) -> None:
+ async with async_client.models.with_streaming_response.upload(
+ model_name="Qwen2.5-72B-Instruct",
+ model_source="unsloth/Qwen2.5-72B-Instruct",
+ ) as response:
+ assert not response.is_closed
+ assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+ model = await response.parse()
+ assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+ assert cast(Any, response.is_closed) is True
diff --git a/tests/conftest.py b/tests/conftest.py
index 2262b6e7..b7e86792 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,7 +10,7 @@
from together import Together, AsyncTogether
if TYPE_CHECKING:
- from _pytest.fixtures import FixtureRequest
+ from _pytest.fixtures import FixtureRequest # pyright: ignore[reportPrivateImportUsage]
pytest.register_assert_rewrite("tests.utils")
diff --git a/tests/test_client.py b/tests/test_client.py
index 5daf9572..b8c41bee 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -814,7 +814,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
)
assert response.retries_taken == failures_before_success
@@ -846,7 +846,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
extra_headers={"x-stainless-retry-count": Omit()},
)
@@ -878,7 +878,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
extra_headers={"x-stainless-retry-count": "42"},
)
@@ -1657,7 +1657,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
)
assert response.retries_taken == failures_before_success
@@ -1690,7 +1690,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
extra_headers={"x-stainless-retry-count": Omit()},
)
@@ -1723,7 +1723,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
"role": "system",
}
],
- model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
extra_headers={"x-stainless-retry-count": "42"},
)
@@ -1741,7 +1741,7 @@ def test_get_platform(self) -> None:
import threading
from together._utils import asyncify
- from together._base_client import get_platform
+ from together._base_client import get_platform
async def test_main() -> None:
result = await asyncify(get_platform)()
diff --git a/tests/test_models.py b/tests/test_models.py
index 6fbb96a3..da728846 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -492,12 +492,15 @@ class Model(BaseModel):
resource_id: Optional[str] = None
m = Model.construct()
+ assert m.resource_id is None
assert "resource_id" not in m.model_fields_set
m = Model.construct(resource_id=None)
+ assert m.resource_id is None
assert "resource_id" in m.model_fields_set
m = Model.construct(resource_id="foo")
+ assert m.resource_id == "foo"
assert "resource_id" in m.model_fields_set
@@ -832,7 +835,7 @@ class B(BaseModel):
@pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1")
def test_type_alias_type() -> None:
- Alias = TypeAliasType("Alias", str)
+ Alias = TypeAliasType("Alias", str) # pyright: ignore
class Model(BaseModel):
alias: Alias
@@ -854,3 +857,35 @@ class Model(BaseModel):
m = construct_type(value={"cls": "foo"}, type_=Model)
assert isinstance(m, Model)
assert isinstance(m.cls, str)
+
+
+def test_discriminated_union_case() -> None:
+ class A(BaseModel):
+ type: Literal["a"]
+
+ data: bool
+
+ class B(BaseModel):
+ type: Literal["b"]
+
+ data: List[Union[A, object]]
+
+ class ModelA(BaseModel):
+ type: Literal["modelA"]
+
+ data: int
+
+ class ModelB(BaseModel):
+ type: Literal["modelB"]
+
+ required: str
+
+ data: Union[A, B]
+
+ # when constructing ModelA | ModelB, value data doesn't match ModelB exactly - missing `required`
+ m = construct_type(
+ value={"type": "modelB", "data": {"type": "a", "data": True}},
+ type_=cast(Any, Annotated[Union[ModelA, ModelB], PropertyInfo(discriminator="type")]),
+ )
+
+ assert isinstance(m, ModelB)
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 3dda603b..2a34b4f7 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -8,7 +8,7 @@
import pytest
-from together._types import Base64FileInput
+from together._types import NOT_GIVEN, Base64FileInput
from together._utils import (
PropertyInfo,
transform as _transform,
@@ -432,3 +432,22 @@ async def test_base64_file_input(use_async: bool) -> None:
assert await transform({"foo": io.BytesIO(b"Hello, world!")}, TypedDictBase64Input, use_async) == {
"foo": "SGVsbG8sIHdvcmxkIQ=="
} # type: ignore[comparison-overlap]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_transform_skipping(use_async: bool) -> None:
+ # lists of ints are left as-is
+ data = [1, 2, 3]
+ assert await transform(data, List[int], use_async) is data
+
+ # iterables of ints are converted to a list
+ data = iter([1, 2, 3])
+ assert await transform(data, Iterable[int], use_async) == [1, 2, 3]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_strips_notgiven(use_async: bool) -> None:
+ assert await transform({"foo_bar": "bar"}, Foo1, use_async) == {"fooBar": "bar"}
+ assert await transform({"foo_bar": NOT_GIVEN}, Foo1, use_async) == {}
diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py
index 75a517dd..7c25ecc7 100644
--- a/tests/test_utils/test_proxy.py
+++ b/tests/test_utils/test_proxy.py
@@ -21,3 +21,14 @@ def test_recursive_proxy() -> None:
assert dir(proxy) == []
assert type(proxy).__name__ == "RecursiveLazyProxy"
assert type(operator.attrgetter("name.foo.bar.baz")(proxy)).__name__ == "RecursiveLazyProxy"
+
+
+def test_isinstance_does_not_error() -> None:
+ class AlwaysErrorProxy(LazyProxy[Any]):
+ @override
+ def __load__(self) -> Any:
+ raise RuntimeError("Mocking missing dependency")
+
+ proxy = AlwaysErrorProxy()
+ assert not isinstance(proxy, dict)
+ assert isinstance(proxy, LazyProxy)