diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 55d20255..ff261bad 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
 
 USER vscode
 
-RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.35.0" RYE_INSTALL_OPTION="--yes" bash
+RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.44.0" RYE_INSTALL_OPTION="--yes" bash
 ENV PATH=/home/vscode/.rye/shims:$PATH
 
 RUN echo "[[ -d .venv ]] && source .venv/bin/activate || export PATH=\$PATH" >> /home/vscode/.bashrc
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c8a8a4f7..a7180a90 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,18 +1,18 @@
 name: CI
 on:
   push:
-    branches:
-      - main
-  pull_request:
-    branches:
-      - main
-      - next
+    branches-ignore:
+      - 'generated'
+      - 'codegen/**'
+      - 'integrated/**'
+      - 'stl-preview-head/**'
+      - 'stl-preview-base/**'
 
 jobs:
   lint:
+    timeout-minutes: 10
     name: lint
-    runs-on: ubuntu-latest
-
+    runs-on: ${{ github.repository == 'stainless-sdks/togetherai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
     steps:
       - uses: actions/checkout@v4
 
@@ -21,7 +21,7 @@ jobs:
           curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: '0.35.0'
+          RYE_VERSION: '0.44.0'
           RYE_INSTALL_OPTION: '--yes'
 
       - name: Install dependencies
@@ -31,9 +31,9 @@ jobs:
         run: ./scripts/lint
 
   test:
+    timeout-minutes: 10
     name: test
-    runs-on: ubuntu-latest
-
+    runs-on: ${{ github.repository == 'stainless-sdks/togetherai-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
     steps:
       - uses: actions/checkout@v4
 
@@ -42,7 +42,7 @@ jobs:
           curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: '0.35.0'
+          RYE_VERSION: '0.44.0'
           RYE_INSTALL_OPTION: '--yes'
 
       - name: Bootstrap
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index fb499434..41ab0074 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -21,7 +21,7 @@ jobs:
           curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: '0.35.0'
+          RYE_VERSION: '0.44.0'
           RYE_INSTALL_OPTION: '--yes'
 
       - name: Publish to PyPI
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index aaf968a1..b56c3d0b 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.3"
+  ".": "0.1.0-alpha.4"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index a38bcf77..03c953a6 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,4 @@
-configured_endpoints: 17
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-a11987df1895448b6ccbbc6d931e7db9a0dc3e6de7c6efb237ac9548342e616b.yml
+configured_endpoints: 28
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-8f50cb3e342f2fd67f1f2cfda195b3d78c0740344f55f37cf1c99c66a0f7c2c5.yml
+openapi_spec_hash: b9907745f73f337395ffd5cef1e8a2d5
+config_hash: a60b100624e80dc8d9144e7bc306f5ce
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d57805cf..ffdc48d9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,70 @@
 # Changelog
 
+## 0.1.0-alpha.4 (2025-05-13)
+
+Full Changelog: [v0.1.0-alpha.3...v0.1.0-alpha.4](https://github.com/togethercomputer/together-py/compare/v0.1.0-alpha.3...v0.1.0-alpha.4)
+
+### Features
+
+* **api:** add tci resources ([023b3a0](https://github.com/togethercomputer/together-py/commit/023b3a00991729a0a06845ee7f64f760cf6f4325))
+* **api:** adds unspecified endpoints ([678f58a](https://github.com/togethercomputer/together-py/commit/678f58af8b2be9e65b667cb0b104a9be4b6667f4))
+* **api:** api update ([6d9609e](https://github.com/togethercomputer/together-py/commit/6d9609e279d228ba1aad926914d089904b858c01))
+* **api:** api update ([bb40eb9](https://github.com/togethercomputer/together-py/commit/bb40eb96cbf911f0f9772c98e261988ab1432383))
+* **api:** api update ([271887f](https://github.com/togethercomputer/together-py/commit/271887fe30d8f4b8f0405d16366e1f82868a1d0d))
+* **api:** api update ([2a7de06](https://github.com/togethercomputer/together-py/commit/2a7de06a3a1b5425a1dd553c32390df21b252e21))
+* **api:** api update ([#117](https://github.com/togethercomputer/together-py/issues/117)) ([dd8e841](https://github.com/togethercomputer/together-py/commit/dd8e841d1eaf40a9f143f63f7f4ced0f701b0fbd))
+* **api:** api update ([#120](https://github.com/togethercomputer/together-py/issues/120)) ([adf0e5b](https://github.com/togethercomputer/together-py/commit/adf0e5ba1cd266278cf4503b04cfcd847a97b0e4))
+* **api:** api update ([#121](https://github.com/togethercomputer/together-py/issues/121)) ([0ab0bc9](https://github.com/togethercomputer/together-py/commit/0ab0bc97ca4db4d2d64f3c2f9eeada9ffa37fc97))
+* **api:** api update ([#130](https://github.com/togethercomputer/together-py/issues/130)) ([4f1a7ea](https://github.com/togethercomputer/together-py/commit/4f1a7ea708c55466f4fa3f1698b505ffbfe2aea6))
+* **api:** api update ([#132](https://github.com/togethercomputer/together-py/issues/132)) ([7c8a194](https://github.com/togethercomputer/together-py/commit/7c8a194c4e1f484f8455adce6f56c840411f4946))
+* **api:** api update ([#135](https://github.com/togethercomputer/together-py/issues/135)) ([22a93e9](https://github.com/togethercomputer/together-py/commit/22a93e9c5c7a33c502f5a4c380c576c2a752d6a5))
+
+
+### Bug Fixes
+
+* **ci:** ensure pip is always available ([#127](https://github.com/togethercomputer/together-py/issues/127)) ([4da2bc0](https://github.com/togethercomputer/together-py/commit/4da2bc0bb7cc4516cf0d93032544fbb71025c118))
+* **ci:** remove publishing patch ([#128](https://github.com/togethercomputer/together-py/issues/128)) ([6bd4d6f](https://github.com/togethercomputer/together-py/commit/6bd4d6f8d8f8842f56cdbb56df0a4d5e5227dde4))
+* **client:** correct type to enum ([#129](https://github.com/togethercomputer/together-py/issues/129)) ([8a5fa0e](https://github.com/togethercomputer/together-py/commit/8a5fa0e2858e851756f022943ada948374bb017c))
+* **package:** support direct resource imports ([f59e7c3](https://github.com/togethercomputer/together-py/commit/f59e7c3b3bcc7c076bd8c71b2ab42f8a117e5519))
+* **perf:** optimize some hot paths ([f79734d](https://github.com/togethercomputer/together-py/commit/f79734d809a4a7c18eb8903190e6b4d90d299e45))
+* **perf:** skip traversing types for NotGiven values ([1103dd0](https://github.com/togethercomputer/together-py/commit/1103dd03e7f021deadd0b000b3bff9c5494442b6))
+* **pydantic v1:** more robust ModelField.annotation check ([d380238](https://github.com/togethercomputer/together-py/commit/d3802383e80ad8d3606a1e753c72a20864531332))
+* skip invalid fine-tune test ([#133](https://github.com/togethercomputer/together-py/issues/133)) ([2f41046](https://github.com/togethercomputer/together-py/commit/2f4104625264947305cee0bd26fc38ff290f16ea))
+* **tests:** correctly skip create fine tune tests ([#138](https://github.com/togethercomputer/together-py/issues/138)) ([47c9cae](https://github.com/togethercomputer/together-py/commit/47c9cae7da9caee8de3ba7480b784fc5d168e1b0))
+* **types:** handle more discriminated union shapes ([#126](https://github.com/togethercomputer/together-py/issues/126)) ([2483c76](https://github.com/togethercomputer/together-py/commit/2483c76ee0cf06ee7a1819446cfa4fa349958da4))
+
+
+### Chores
+
+* broadly detect json family of content-type headers ([6e2421e](https://github.com/togethercomputer/together-py/commit/6e2421e126e74b4bcc7bc2aaef07a078bdd1e0ea))
+* **ci:** add timeout thresholds for CI jobs ([2425c53](https://github.com/togethercomputer/together-py/commit/2425c53723d34959380d44131d607ded5a665004))
+* **ci:** only use depot for staging repos ([2dfe569](https://github.com/togethercomputer/together-py/commit/2dfe569cf72f74a97fbe1e282c9d079c371d32aa))
+* **ci:** run on more branches and use depot runners ([3c61f56](https://github.com/togethercomputer/together-py/commit/3c61f565633c395dba16fda924c241910145c13c))
+* **client:** minor internal fixes ([f6f5174](https://github.com/togethercomputer/together-py/commit/f6f5174c6ec0b9a3a4decfc25737efbbb52bffe5))
+* fix typos ([#131](https://github.com/togethercomputer/together-py/issues/131)) ([dedf3ad](https://github.com/togethercomputer/together-py/commit/dedf3adb709255ba9303e29354b013db8a8520b9))
+* **internal:** avoid errors for isinstance checks on proxies ([8b81509](https://github.com/togethercomputer/together-py/commit/8b81509faac153ee4a33b3460c17759e2465dfcd))
+* **internal:** base client updates ([890efc3](https://github.com/togethercomputer/together-py/commit/890efc36f00553025237601bad51f3f0a906376b))
+* **internal:** bump pyright version ([01e104a](https://github.com/togethercomputer/together-py/commit/01e104a2bba92c77ef610cf48720d8a2785ff39b))
+* **internal:** bump rye to 0.44.0 ([#124](https://github.com/togethercomputer/together-py/issues/124)) ([e8c3dc3](https://github.com/togethercomputer/together-py/commit/e8c3dc3be0e56d7c4e7a48d8f824a88878e0c981))
+* **internal:** codegen related update ([#125](https://github.com/togethercomputer/together-py/issues/125)) ([5e83e04](https://github.com/togethercomputer/together-py/commit/5e83e043b3f62c38fa13c72d54278e845c2df46a))
+* **internal:** expand CI branch coverage ([#139](https://github.com/togethercomputer/together-py/issues/139)) ([2db8ca2](https://github.com/togethercomputer/together-py/commit/2db8ca2b6d063b136e9cb50c3991a11f6f47e4fb))
+* **internal:** fix list file params ([8a8dcd3](https://github.com/togethercomputer/together-py/commit/8a8dcd384e480c52358460ba662a48311a415cfb))
+* **internal:** import reformatting ([49f361b](https://github.com/togethercomputer/together-py/commit/49f361bf9d548ca45a01e31972b5db797752e481))
+* **internal:** minor formatting changes ([33e3a75](https://github.com/togethercomputer/together-py/commit/33e3a751bd9f3382e5e462bbcf92a212e14d26ff))
+* **internal:** reduce CI branch coverage ([6f6ac97](https://github.com/togethercomputer/together-py/commit/6f6ac973e36bdeb28883ff6281228c67f76c55a1))
+* **internal:** refactor retries to not use recursion ([ffb0eb4](https://github.com/togethercomputer/together-py/commit/ffb0eb46712544a86f01eaa842ac13f085e37fee))
+* **internal:** remove extra empty newlines ([#122](https://github.com/togethercomputer/together-py/issues/122)) ([b0cbbaa](https://github.com/togethercomputer/together-py/commit/b0cbbaa10e003e84cf2c8c23ef05baa6bc9d4e82))
+* **internal:** remove trailing character ([#134](https://github.com/togethercomputer/together-py/issues/134)) ([f09c6cb](https://github.com/togethercomputer/together-py/commit/f09c6cb1620997e72b99bc918d77ae9a2be9e8b3))
+* **internal:** slight transform perf improvement ([#136](https://github.com/togethercomputer/together-py/issues/136)) ([d31383c](https://github.com/togethercomputer/together-py/commit/d31383c0f8fb1749381fad871aa60bd0eaad3e03))
+* **internal:** update models test ([b64d4cc](https://github.com/togethercomputer/together-py/commit/b64d4cc9a1424fa7f46088e51306b877afba3fae))
+* **internal:** update pyright settings ([05720d5](https://github.com/togethercomputer/together-py/commit/05720d5b0b7387fbe3b04975dfa6b764898a7a02))
+* **tests:** improve enum examples ([#137](https://github.com/togethercomputer/together-py/issues/137)) ([4c3e75d](https://github.com/togethercomputer/together-py/commit/4c3e75d5aa75421d4aca257c0df89d24e2db264e))
+
+
+### Documentation
+
+* revise readme docs about nested params ([#118](https://github.com/togethercomputer/together-py/issues/118)) ([0eefffd](https://github.com/togethercomputer/together-py/commit/0eefffd623bc692f2e03fd299b9b05c3bb88bf53))
+
 ## 0.1.0-alpha.3 (2025-03-05)
 
 Full Changelog: [v0.1.0-alpha.2...v0.1.0-alpha.3](https://github.com/togethercomputer/together-py/compare/v0.1.0-alpha.2...v0.1.0-alpha.3)
diff --git a/README.md b/README.md
index 1c6e44f5..fa7c96af 100644
--- a/README.md
+++ b/README.md
@@ -136,6 +136,31 @@ Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typ
 
 Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`.
 
+## Nested params
+
+Nested parameters are dictionaries, typed using `TypedDict`, for example:
+
+```python
+from together import Together
+
+client = Together()
+
+chat_completion = client.chat.completions.create(
+    messages=[
+        {
+            "content": "string",
+            "role": "system",
+        }
+    ],
+    model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+    response_format={
+        "schema": {"foo": "bar"},
+        "type": "json",
+    },
+)
+print(chat_completion.response_format)
+```
+
 ## Handling errors
 
 When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `together.APIConnectionError` is raised.
diff --git a/api.md b/api.md
index 079efc3b..d1cb335c 100644
--- a/api.md
+++ b/api.md
@@ -86,6 +86,30 @@ Methods:
 - <code title="get /finetune/download">client.fine_tune.<a href="./src/together/resources/fine_tune.py">download</a>(\*\*<a href="src/together/types/fine_tune_download_params.py">params</a>) -> <a href="./src/together/types/fine_tune_download_response.py">FineTuneDownloadResponse</a></code>
 - <code title="get /fine-tunes/{id}/events">client.fine_tune.<a href="./src/together/resources/fine_tune.py">list_events</a>(id) -> <a href="./src/together/types/fine_tune_event.py">FineTuneEvent</a></code>
 
+# CodeInterpreter
+
+Types:
+
+```python
+from together.types import ExecuteResponse
+```
+
+Methods:
+
+- <code title="post /tci/execute">client.code_interpreter.<a href="./src/together/resources/code_interpreter/code_interpreter.py">execute</a>(\*\*<a href="src/together/types/code_interpreter_execute_params.py">params</a>) -> <a href="./src/together/types/execute_response.py">ExecuteResponse</a></code>
+
+## Sessions
+
+Types:
+
+```python
+from together.types.code_interpreter import SessionListResponse
+```
+
+Methods:
+
+- <code title="get /tci/sessions">client.code_interpreter.sessions.<a href="./src/together/resources/code_interpreter/sessions.py">list</a>() -> <a href="./src/together/types/code_interpreter/session_list_response.py">SessionListResponse</a></code>
+
 # Images
 
 Types:
@@ -115,9 +139,56 @@ Methods:
 Types:
 
 ```python
-from together.types import ModelListResponse
+from together.types import ModelListResponse, ModelUploadResponse
 ```
 
 Methods:
 
 - <code title="get /models">client.models.<a href="./src/together/resources/models.py">list</a>() -> <a href="./src/together/types/model_list_response.py">ModelListResponse</a></code>
+- <code title="post /models">client.models.<a href="./src/together/resources/models.py">upload</a>(\*\*<a href="src/together/types/model_upload_params.py">params</a>) -> <a href="./src/together/types/model_upload_response.py">ModelUploadResponse</a></code>
+
+# Jobs
+
+Types:
+
+```python
+from together.types import JobRetrieveResponse, JobListResponse
+```
+
+Methods:
+
+- <code title="get /jobs/{jobId}">client.jobs.<a href="./src/together/resources/jobs.py">retrieve</a>(job_id) -> <a href="./src/together/types/job_retrieve_response.py">JobRetrieveResponse</a></code>
+- <code title="get /jobs">client.jobs.<a href="./src/together/resources/jobs.py">list</a>() -> <a href="./src/together/types/job_list_response.py">JobListResponse</a></code>
+
+# Endpoints
+
+Types:
+
+```python
+from together.types import (
+    EndpointCreateResponse,
+    EndpointRetrieveResponse,
+    EndpointUpdateResponse,
+    EndpointListResponse,
+)
+```
+
+Methods:
+
+- <code title="post /endpoints">client.endpoints.<a href="./src/together/resources/endpoints.py">create</a>(\*\*<a href="src/together/types/endpoint_create_params.py">params</a>) -> <a href="./src/together/types/endpoint_create_response.py">EndpointCreateResponse</a></code>
+- <code title="get /endpoints/{endpointId}">client.endpoints.<a href="./src/together/resources/endpoints.py">retrieve</a>(endpoint_id) -> <a href="./src/together/types/endpoint_retrieve_response.py">EndpointRetrieveResponse</a></code>
+- <code title="patch /endpoints/{endpointId}">client.endpoints.<a href="./src/together/resources/endpoints.py">update</a>(endpoint_id, \*\*<a href="src/together/types/endpoint_update_params.py">params</a>) -> <a href="./src/together/types/endpoint_update_response.py">EndpointUpdateResponse</a></code>
+- <code title="get /endpoints">client.endpoints.<a href="./src/together/resources/endpoints.py">list</a>(\*\*<a href="src/together/types/endpoint_list_params.py">params</a>) -> <a href="./src/together/types/endpoint_list_response.py">EndpointListResponse</a></code>
+- <code title="delete /endpoints/{endpointId}">client.endpoints.<a href="./src/together/resources/endpoints.py">delete</a>(endpoint_id) -> None</code>
+
+# Hardware
+
+Types:
+
+```python
+from together.types import HardwareListResponse
+```
+
+Methods:
+
+- <code title="get /hardware">client.hardware.<a href="./src/together/resources/hardware.py">list</a>(\*\*<a href="src/together/types/hardware_list_params.py">params</a>) -> <a href="./src/together/types/hardware_list_response.py">HardwareListResponse</a></code>
diff --git a/bin/publish-pypi b/bin/publish-pypi
index 05bfccbb..826054e9 100644
--- a/bin/publish-pypi
+++ b/bin/publish-pypi
@@ -3,7 +3,4 @@
 set -eux
 mkdir -p dist
 rye build --clean
-# Patching importlib-metadata version until upstream library version is updated
-# https://github.com/pypa/twine/issues/977#issuecomment-2189800841
-"$HOME/.rye/self/bin/python3" -m pip install 'importlib-metadata==7.2.1'
 rye publish --yes --token=$PYPI_TOKEN
diff --git a/pyproject.toml b/pyproject.toml
index 6abb7a59..943b01f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "together"
-version = "0.1.0-alpha.3"
+version = "0.1.0-alpha.4"
 description = "The official Python library for the together API"
 dynamic = ["readme"]
 license = "Apache-2.0"
@@ -50,7 +50,7 @@ together = "together.lib.cli.cli:main"
 managed = true
 # version pins are in requirements-dev.lock
 dev-dependencies = [
-    "pyright>=1.1.359",
+    "pyright==1.1.399",
     "mypy",
     "respx",
     "pytest",
@@ -94,7 +94,7 @@ typecheck = { chain = [
 "typecheck:mypy" = "mypy ."
 
 [build-system]
-requires = ["hatchling", "hatch-fancy-pypi-readme"]
+requires = ["hatchling==1.26.3", "hatch-fancy-pypi-readme"]
 build-backend = "hatchling.build"
 
 [tool.hatch.build]
@@ -155,11 +155,11 @@ exclude = [
 ]
 
 reportImplicitOverride = true
+reportOverlappingOverload = false
 
 reportImportCycles = false
 reportPrivateUsage = false
 
-
 [tool.ruff]
 line-length = 120
 output-format = "grouped"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 76cd6127..addb0f1e 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -7,6 +7,7 @@
 #   all-features: true
 #   with-sources: false
 #   generate-hashes: false
+#   universal: false
 
 -e file:.
 annotated-types==0.6.0
@@ -80,7 +81,7 @@ pydantic-core==2.27.1
     # via pydantic
 pygments==2.18.0
     # via rich
-pyright==1.1.392.post0
+pyright==1.1.399
 pytest==8.3.3
     # via pytest-asyncio
 pytest-asyncio==0.24.0
diff --git a/requirements.lock b/requirements.lock
index 04389e74..1d8f9198 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -7,6 +7,7 @@
 #   all-features: true
 #   with-sources: false
 #   generate-hashes: false
+#   universal: false
 
 -e file:.
 annotated-types==0.6.0
diff --git a/scripts/test b/scripts/test
index 4fa5698b..2b878456 100755
--- a/scripts/test
+++ b/scripts/test
@@ -52,6 +52,8 @@ else
   echo
 fi
 
+export DEFER_PYDANTIC_BUILD=false
+
 echo "==> Running tests"
 rye run pytest "$@"
 
diff --git a/src/together/__init__.py b/src/together/__init__.py
index bbaaebad..8adebcd9 100644
--- a/src/together/__init__.py
+++ b/src/together/__init__.py
@@ -1,5 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+import typing as _t
+
 from . import types
 from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes
 from ._utils import file_from_path
@@ -78,6 +80,9 @@
     "DefaultAsyncHttpxClient",
 ]
 
+if not _t.TYPE_CHECKING:
+    from ._utils._resources_proxy import resources as resources
+
 _setup_logging()
 
 # Update the __module__ attribute for exported symbols so that
diff --git a/src/together/_base_client.py b/src/together/_base_client.py
index 2e2c8939..ee2f5115 100644
--- a/src/together/_base_client.py
+++ b/src/together/_base_client.py
@@ -98,7 +98,11 @@
 _AsyncStreamT = TypeVar("_AsyncStreamT", bound=AsyncStream[Any])
 
 if TYPE_CHECKING:
-    from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+    from httpx._config import (
+        DEFAULT_TIMEOUT_CONFIG,  # pyright: ignore[reportPrivateImportUsage]
+    )
+
+    HTTPX_DEFAULT_TIMEOUT = DEFAULT_TIMEOUT_CONFIG
 else:
     try:
         from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
@@ -115,6 +119,7 @@ class PageInfo:
 
     url: URL | NotGiven
     params: Query | NotGiven
+    json: Body | NotGiven
 
     @overload
     def __init__(
@@ -130,19 +135,30 @@ def __init__(
         params: Query,
     ) -> None: ...
 
+    @overload
+    def __init__(
+        self,
+        *,
+        json: Body,
+    ) -> None: ...
+
     def __init__(
         self,
         *,
         url: URL | NotGiven = NOT_GIVEN,
+        json: Body | NotGiven = NOT_GIVEN,
         params: Query | NotGiven = NOT_GIVEN,
     ) -> None:
         self.url = url
+        self.json = json
         self.params = params
 
     @override
     def __repr__(self) -> str:
         if self.url:
             return f"{self.__class__.__name__}(url={self.url})"
+        if self.json:
+            return f"{self.__class__.__name__}(json={self.json})"
         return f"{self.__class__.__name__}(params={self.params})"
 
 
@@ -191,6 +207,19 @@ def _info_to_options(self, info: PageInfo) -> FinalRequestOptions:
             options.url = str(url)
             return options
 
+        if not isinstance(info.json, NotGiven):
+            if not is_mapping(info.json):
+                raise TypeError("Pagination is only supported with mappings")
+
+            if not options.json_data:
+                options.json_data = {**info.json}
+            else:
+                if not is_mapping(options.json_data):
+                    raise TypeError("Pagination is only supported with mappings")
+
+                options.json_data = {**options.json_data, **info.json}
+            return options
+
         raise ValueError("Unexpected PageInfo state")
 
 
@@ -408,8 +437,8 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0
         headers = httpx.Headers(headers_dict)
 
         idempotency_header = self._idempotency_header
-        if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
-            headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
+        if idempotency_header and options.idempotency_key and idempotency_header not in headers:
+            headers[idempotency_header] = options.idempotency_key
 
         # Don't set these headers if they were already set or removed by the caller. We check
         # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
@@ -873,7 +902,6 @@ def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: Literal[True],
         stream_cls: Type[_StreamT],
@@ -884,7 +912,6 @@ def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: Literal[False] = False,
     ) -> ResponseT: ...
@@ -894,7 +921,6 @@ def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: bool = False,
         stream_cls: Type[_StreamT] | None = None,
@@ -904,121 +930,109 @@ def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: bool = False,
         stream_cls: type[_StreamT] | None = None,
     ) -> ResponseT | _StreamT:
-        if remaining_retries is not None:
-            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
-        else:
-            retries_taken = 0
-
-        return self._request(
-            cast_to=cast_to,
-            options=options,
-            stream=stream,
-            stream_cls=stream_cls,
-            retries_taken=retries_taken,
-        )
+        cast_to = self._maybe_override_cast_to(cast_to, options)
 
-    def _request(
-        self,
-        *,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        retries_taken: int,
-        stream: bool,
-        stream_cls: type[_StreamT] | None,
-    ) -> ResponseT | _StreamT:
         # create a copy of the options we were given so that if the
         # options are mutated later & we then retry, the retries are
         # given the original options
         input_options = model_copy(options)
+        if input_options.idempotency_key is None and input_options.method.lower() != "get":
+            # ensure the idempotency key is reused between requests
+            input_options.idempotency_key = self._idempotency_key()
 
-        cast_to = self._maybe_override_cast_to(cast_to, options)
-        options = self._prepare_options(options)
-
-        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
-        request = self._build_request(options, retries_taken=retries_taken)
-        self._prepare_request(request)
-
-        kwargs: HttpxSendArgs = {}
-        if self.custom_auth is not None:
-            kwargs["auth"] = self.custom_auth
+        response: httpx.Response | None = None
+        max_retries = input_options.get_max_retries(self.max_retries)
 
-        log.debug("Sending HTTP Request: %s %s", request.method, request.url)
+        retries_taken = 0
+        for retries_taken in range(max_retries + 1):
+            options = model_copy(input_options)
+            options = self._prepare_options(options)
 
-        try:
-            response = self._client.send(
-                request,
-                stream=stream or self._should_stream_response_body(request=request),
-                **kwargs,
-            )
-        except httpx.TimeoutException as err:
-            log.debug("Encountered httpx.TimeoutException", exc_info=True)
+            remaining_retries = max_retries - retries_taken
+            request = self._build_request(options, retries_taken=retries_taken)
+            self._prepare_request(request)
 
-            if remaining_retries > 0:
-                return self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
+            kwargs: HttpxSendArgs = {}
+            if self.custom_auth is not None:
+                kwargs["auth"] = self.custom_auth
 
-            log.debug("Raising timeout error")
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            log.debug("Encountered Exception", exc_info=True)
+            log.debug("Sending HTTP Request: %s %s", request.method, request.url)
 
-            if remaining_retries > 0:
-                return self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
+            response = None
+            try:
+                response = self._client.send(
+                    request,
+                    stream=stream or self._should_stream_response_body(request=request),
+                    **kwargs,
                 )
+            except httpx.TimeoutException as err:
+                log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+                if remaining_retries > 0:
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising timeout error")
+                raise APITimeoutError(request=request) from err
+            except Exception as err:
+                log.debug("Encountered Exception", exc_info=True)
+
+                if remaining_retries > 0:
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising connection error")
+                raise APIConnectionError(request=request) from err
+
+            log.debug(
+                'HTTP Response: %s %s "%i %s" %s',
+                request.method,
+                request.url,
+                response.status_code,
+                response.reason_phrase,
+                response.headers,
+            )
 
-            log.debug("Raising connection error")
-            raise APIConnectionError(request=request) from err
-
-        log.debug(
-            'HTTP Response: %s %s "%i %s" %s',
-            request.method,
-            request.url,
-            response.status_code,
-            response.reason_phrase,
-            response.headers,
-        )
+            try:
+                response.raise_for_status()
+            except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+                log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+                if remaining_retries > 0 and self._should_retry(err.response):
+                    err.response.close()
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=response,
+                    )
+                    continue
 
-        try:
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
-            if remaining_retries > 0 and self._should_retry(err.response):
-                err.response.close()
-                return self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    response_headers=err.response.headers,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                )
+                # If the response is streamed then we need to explicitly read the response
+                # to completion before attempting to access the response text.
+                if not err.response.is_closed:
+                    err.response.read()
 
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            if not err.response.is_closed:
-                err.response.read()
+                log.debug("Re-raising status error")
+                raise self._make_status_error_from_response(err.response) from None
 
-            log.debug("Re-raising status error")
-            raise self._make_status_error_from_response(err.response) from None
+            break
 
+        assert response is not None, "could not resolve response (should never happen)"
         return self._process_response(
             cast_to=cast_to,
             options=options,
@@ -1028,37 +1042,20 @@ def _request(
             retries_taken=retries_taken,
         )
 
-    def _retry_request(
-        self,
-        options: FinalRequestOptions,
-        cast_to: Type[ResponseT],
-        *,
-        retries_taken: int,
-        response_headers: httpx.Headers | None,
-        stream: bool,
-        stream_cls: type[_StreamT] | None,
-    ) -> ResponseT | _StreamT:
-        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+    def _sleep_for_retry(
+        self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+    ) -> None:
+        remaining_retries = max_retries - retries_taken
         if remaining_retries == 1:
             log.debug("1 retry left")
         else:
             log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
-        # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
-        # different thread if necessary.
         time.sleep(timeout)
 
-        return self._request(
-            options=options,
-            cast_to=cast_to,
-            retries_taken=retries_taken + 1,
-            stream=stream,
-            stream_cls=stream_cls,
-        )
-
     def _process_response(
         self,
         *,
@@ -1402,7 +1399,6 @@ async def request(
         options: FinalRequestOptions,
         *,
         stream: Literal[False] = False,
-        remaining_retries: Optional[int] = None,
     ) -> ResponseT: ...
 
     @overload
@@ -1413,7 +1409,6 @@ async def request(
         *,
         stream: Literal[True],
         stream_cls: type[_AsyncStreamT],
-        remaining_retries: Optional[int] = None,
     ) -> _AsyncStreamT: ...
 
     @overload
@@ -1424,7 +1419,6 @@ async def request(
         *,
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None = None,
-        remaining_retries: Optional[int] = None,
     ) -> ResponseT | _AsyncStreamT: ...
 
     async def request(
@@ -1434,116 +1428,111 @@ async def request(
         *,
         stream: bool = False,
         stream_cls: type[_AsyncStreamT] | None = None,
-        remaining_retries: Optional[int] = None,
-    ) -> ResponseT | _AsyncStreamT:
-        if remaining_retries is not None:
-            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
-        else:
-            retries_taken = 0
-
-        return await self._request(
-            cast_to=cast_to,
-            options=options,
-            stream=stream,
-            stream_cls=stream_cls,
-            retries_taken=retries_taken,
-        )
-
-    async def _request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        *,
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None,
-        retries_taken: int,
     ) -> ResponseT | _AsyncStreamT:
         if self._platform is None:
             # `get_platform` can make blocking IO calls so we
             # execute it earlier while we are in an async context
             self._platform = await asyncify(get_platform)()
 
+        cast_to = self._maybe_override_cast_to(cast_to, options)
+
         # create a copy of the options we were given so that if the
         # options are mutated later & we then retry, the retries are
         # given the original options
         input_options = model_copy(options)
+        if input_options.idempotency_key is None and input_options.method.lower() != "get":
+            # ensure the idempotency key is reused between requests
+            input_options.idempotency_key = self._idempotency_key()
 
-        cast_to = self._maybe_override_cast_to(cast_to, options)
-        options = await self._prepare_options(options)
+        response: httpx.Response | None = None
+        max_retries = input_options.get_max_retries(self.max_retries)
 
-        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
-        request = self._build_request(options, retries_taken=retries_taken)
-        await self._prepare_request(request)
+        retries_taken = 0
+        for retries_taken in range(max_retries + 1):
+            options = model_copy(input_options)
+            options = await self._prepare_options(options)
 
-        kwargs: HttpxSendArgs = {}
-        if self.custom_auth is not None:
-            kwargs["auth"] = self.custom_auth
+            remaining_retries = max_retries - retries_taken
+            request = self._build_request(options, retries_taken=retries_taken)
+            await self._prepare_request(request)
 
-        try:
-            response = await self._client.send(
-                request,
-                stream=stream or self._should_stream_response_body(request=request),
-                **kwargs,
-            )
-        except httpx.TimeoutException as err:
-            log.debug("Encountered httpx.TimeoutException", exc_info=True)
-
-            if remaining_retries > 0:
-                return await self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
+            kwargs: HttpxSendArgs = {}
+            if self.custom_auth is not None:
+                kwargs["auth"] = self.custom_auth
 
-            log.debug("Raising timeout error")
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            log.debug("Encountered Exception", exc_info=True)
+            log.debug("Sending HTTP Request: %s %s", request.method, request.url)
 
-            if remaining_retries > 0:
-                return await self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
+            response = None
+            try:
+                response = await self._client.send(
+                    request,
+                    stream=stream or self._should_stream_response_body(request=request),
+                    **kwargs,
                 )
+            except httpx.TimeoutException as err:
+                log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+                if remaining_retries > 0:
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising timeout error")
+                raise APITimeoutError(request=request) from err
+            except Exception as err:
+                log.debug("Encountered Exception", exc_info=True)
+
+                if remaining_retries > 0:
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising connection error")
+                raise APIConnectionError(request=request) from err
+
+            log.debug(
+                'HTTP Response: %s %s "%i %s" %s',
+                request.method,
+                request.url,
+                response.status_code,
+                response.reason_phrase,
+                response.headers,
+            )
 
-            log.debug("Raising connection error")
-            raise APIConnectionError(request=request) from err
+            try:
+                response.raise_for_status()
+            except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+                log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+                if remaining_retries > 0 and self._should_retry(err.response):
+                    await err.response.aclose()
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=response,
+                    )
+                    continue
 
-        log.debug(
-            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
-        )
+                # If the response is streamed then we need to explicitly read the response
+                # to completion before attempting to access the response text.
+                if not err.response.is_closed:
+                    await err.response.aread()
 
-        try:
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
-            if remaining_retries > 0 and self._should_retry(err.response):
-                await err.response.aclose()
-                return await self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    response_headers=err.response.headers,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                )
+                log.debug("Re-raising status error")
+                raise self._make_status_error_from_response(err.response) from None
 
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            if not err.response.is_closed:
-                await err.response.aread()
-
-            log.debug("Re-raising status error")
-            raise self._make_status_error_from_response(err.response) from None
+            break
 
+        assert response is not None, "could not resolve response (should never happen)"
         return await self._process_response(
             cast_to=cast_to,
             options=options,
@@ -1553,35 +1542,20 @@ async def _request(
             retries_taken=retries_taken,
         )
 
-    async def _retry_request(
-        self,
-        options: FinalRequestOptions,
-        cast_to: Type[ResponseT],
-        *,
-        retries_taken: int,
-        response_headers: httpx.Headers | None,
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None,
-    ) -> ResponseT | _AsyncStreamT:
-        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+    async def _sleep_for_retry(
+        self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+    ) -> None:
+        remaining_retries = max_retries - retries_taken
         if remaining_retries == 1:
             log.debug("1 retry left")
         else:
             log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
         await anyio.sleep(timeout)
 
-        return await self._request(
-            options=options,
-            cast_to=cast_to,
-            retries_taken=retries_taken + 1,
-            stream=stream,
-            stream_cls=stream_cls,
-        )
-
     async def _process_response(
         self,
         *,
diff --git a/src/together/_client.py b/src/together/_client.py
index a37edcc1..f2e64aed 100644
--- a/src/together/_client.py
+++ b/src/together/_client.py
@@ -36,7 +36,7 @@
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from .resources import audio, files, images, models, fine_tune, embeddings, completions
+from .resources import jobs, audio, files, images, models, hardware, endpoints, fine_tune, embeddings, completions
 from ._streaming import Stream as Stream, AsyncStream as AsyncStream
 from ._exceptions import TogetherError, APIStatusError
 from ._base_client import (
@@ -47,6 +47,7 @@
 )
 from .resources.chat import chat
 from .types.rerank_response import RerankResponse
+from .resources.code_interpreter import code_interpreter
 
 __all__ = [
     "Timeout",
@@ -66,9 +67,13 @@ class Together(SyncAPIClient):
     embeddings: embeddings.EmbeddingsResource
     files: files.FilesResource
     fine_tune: fine_tune.FineTuneResource
+    code_interpreter: code_interpreter.CodeInterpreterResource
     images: images.ImagesResource
     audio: audio.AudioResource
     models: models.ModelsResource
+    jobs: jobs.JobsResource
+    endpoints: endpoints.EndpointsResource
+    hardware: hardware.HardwareResource
     with_raw_response: TogetherWithRawResponse
     with_streaming_response: TogetherWithStreamedResponse
 
@@ -133,9 +138,13 @@ def __init__(
         self.embeddings = embeddings.EmbeddingsResource(self)
         self.files = files.FilesResource(self)
         self.fine_tune = fine_tune.FineTuneResource(self)
+        self.code_interpreter = code_interpreter.CodeInterpreterResource(self)
         self.images = images.ImagesResource(self)
         self.audio = audio.AudioResource(self)
         self.models = models.ModelsResource(self)
+        self.jobs = jobs.JobsResource(self)
+        self.endpoints = endpoints.EndpointsResource(self)
+        self.hardware = hardware.HardwareResource(self)
         self.with_raw_response = TogetherWithRawResponse(self)
         self.with_streaming_response = TogetherWithStreamedResponse(self)
 
@@ -312,9 +321,13 @@ class AsyncTogether(AsyncAPIClient):
     embeddings: embeddings.AsyncEmbeddingsResource
     files: files.AsyncFilesResource
     fine_tune: fine_tune.AsyncFineTuneResource
+    code_interpreter: code_interpreter.AsyncCodeInterpreterResource
     images: images.AsyncImagesResource
     audio: audio.AsyncAudioResource
     models: models.AsyncModelsResource
+    jobs: jobs.AsyncJobsResource
+    endpoints: endpoints.AsyncEndpointsResource
+    hardware: hardware.AsyncHardwareResource
     with_raw_response: AsyncTogetherWithRawResponse
     with_streaming_response: AsyncTogetherWithStreamedResponse
 
@@ -379,9 +392,13 @@ def __init__(
         self.embeddings = embeddings.AsyncEmbeddingsResource(self)
         self.files = files.AsyncFilesResource(self)
         self.fine_tune = fine_tune.AsyncFineTuneResource(self)
+        self.code_interpreter = code_interpreter.AsyncCodeInterpreterResource(self)
         self.images = images.AsyncImagesResource(self)
         self.audio = audio.AsyncAudioResource(self)
         self.models = models.AsyncModelsResource(self)
+        self.jobs = jobs.AsyncJobsResource(self)
+        self.endpoints = endpoints.AsyncEndpointsResource(self)
+        self.hardware = hardware.AsyncHardwareResource(self)
         self.with_raw_response = AsyncTogetherWithRawResponse(self)
         self.with_streaming_response = AsyncTogetherWithStreamedResponse(self)
 
@@ -559,9 +576,13 @@ def __init__(self, client: Together) -> None:
         self.embeddings = embeddings.EmbeddingsResourceWithRawResponse(client.embeddings)
         self.files = files.FilesResourceWithRawResponse(client.files)
         self.fine_tune = fine_tune.FineTuneResourceWithRawResponse(client.fine_tune)
+        self.code_interpreter = code_interpreter.CodeInterpreterResourceWithRawResponse(client.code_interpreter)
         self.images = images.ImagesResourceWithRawResponse(client.images)
         self.audio = audio.AudioResourceWithRawResponse(client.audio)
         self.models = models.ModelsResourceWithRawResponse(client.models)
+        self.jobs = jobs.JobsResourceWithRawResponse(client.jobs)
+        self.endpoints = endpoints.EndpointsResourceWithRawResponse(client.endpoints)
+        self.hardware = hardware.HardwareResourceWithRawResponse(client.hardware)
 
         self.rerank = to_raw_response_wrapper(
             client.rerank,
@@ -575,9 +596,13 @@ def __init__(self, client: AsyncTogether) -> None:
         self.embeddings = embeddings.AsyncEmbeddingsResourceWithRawResponse(client.embeddings)
         self.files = files.AsyncFilesResourceWithRawResponse(client.files)
         self.fine_tune = fine_tune.AsyncFineTuneResourceWithRawResponse(client.fine_tune)
+        self.code_interpreter = code_interpreter.AsyncCodeInterpreterResourceWithRawResponse(client.code_interpreter)
         self.images = images.AsyncImagesResourceWithRawResponse(client.images)
         self.audio = audio.AsyncAudioResourceWithRawResponse(client.audio)
         self.models = models.AsyncModelsResourceWithRawResponse(client.models)
+        self.jobs = jobs.AsyncJobsResourceWithRawResponse(client.jobs)
+        self.endpoints = endpoints.AsyncEndpointsResourceWithRawResponse(client.endpoints)
+        self.hardware = hardware.AsyncHardwareResourceWithRawResponse(client.hardware)
 
         self.rerank = async_to_raw_response_wrapper(
             client.rerank,
@@ -591,9 +616,13 @@ def __init__(self, client: Together) -> None:
         self.embeddings = embeddings.EmbeddingsResourceWithStreamingResponse(client.embeddings)
         self.files = files.FilesResourceWithStreamingResponse(client.files)
         self.fine_tune = fine_tune.FineTuneResourceWithStreamingResponse(client.fine_tune)
+        self.code_interpreter = code_interpreter.CodeInterpreterResourceWithStreamingResponse(client.code_interpreter)
         self.images = images.ImagesResourceWithStreamingResponse(client.images)
         self.audio = audio.AudioResourceWithStreamingResponse(client.audio)
         self.models = models.ModelsResourceWithStreamingResponse(client.models)
+        self.jobs = jobs.JobsResourceWithStreamingResponse(client.jobs)
+        self.endpoints = endpoints.EndpointsResourceWithStreamingResponse(client.endpoints)
+        self.hardware = hardware.HardwareResourceWithStreamingResponse(client.hardware)
 
         self.rerank = to_streamed_response_wrapper(
             client.rerank,
@@ -607,9 +636,15 @@ def __init__(self, client: AsyncTogether) -> None:
         self.embeddings = embeddings.AsyncEmbeddingsResourceWithStreamingResponse(client.embeddings)
         self.files = files.AsyncFilesResourceWithStreamingResponse(client.files)
         self.fine_tune = fine_tune.AsyncFineTuneResourceWithStreamingResponse(client.fine_tune)
+        self.code_interpreter = code_interpreter.AsyncCodeInterpreterResourceWithStreamingResponse(
+            client.code_interpreter
+        )
         self.images = images.AsyncImagesResourceWithStreamingResponse(client.images)
         self.audio = audio.AsyncAudioResourceWithStreamingResponse(client.audio)
         self.models = models.AsyncModelsResourceWithStreamingResponse(client.models)
+        self.jobs = jobs.AsyncJobsResourceWithStreamingResponse(client.jobs)
+        self.endpoints = endpoints.AsyncEndpointsResourceWithStreamingResponse(client.endpoints)
+        self.hardware = hardware.AsyncHardwareResourceWithStreamingResponse(client.hardware)
 
         self.rerank = async_to_streamed_response_wrapper(
             client.rerank,
diff --git a/src/together/_models.py b/src/together/_models.py
index c4401ff8..798956f1 100644
--- a/src/together/_models.py
+++ b/src/together/_models.py
@@ -19,7 +19,6 @@
 )
 
 import pydantic
-import pydantic.generics
 from pydantic.fields import FieldInfo
 
 from ._types import (
@@ -65,7 +64,7 @@
 from ._constants import RAW_RESPONSE_HEADER
 
 if TYPE_CHECKING:
-    from pydantic_core.core_schema import ModelField, LiteralSchema, ModelFieldsSchema
+    from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema
 
 __all__ = ["BaseModel", "GenericModel"]
 
@@ -627,8 +626,8 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any,
                 # Note: if one variant defines an alias then they all should
                 discriminator_alias = field_info.alias
 
-                if field_info.annotation and is_literal_type(field_info.annotation):
-                    for entry in get_args(field_info.annotation):
+                if (annotation := getattr(field_info, "annotation", None)) and is_literal_type(annotation):
+                    for entry in get_args(annotation):
                         if isinstance(entry, str):
                             mapping[entry] = variant
 
@@ -646,15 +645,18 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any,
 
 def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None:
     schema = model.__pydantic_core_schema__
+    if schema["type"] == "definitions":
+        schema = schema["schema"]
+
     if schema["type"] != "model":
         return None
 
+    schema = cast("ModelSchema", schema)
     fields_schema = schema["schema"]
     if fields_schema["type"] != "model-fields":
         return None
 
     fields_schema = cast("ModelFieldsSchema", fields_schema)
-
     field = fields_schema["fields"].get(field_name)
     if not field:
         return None
@@ -678,7 +680,7 @@ def set_pydantic_config(typ: Any, config: pydantic.ConfigDict) -> None:
     setattr(typ, "__pydantic_config__", config)  # noqa: B010
 
 
-# our use of subclasssing here causes weirdness for type checkers,
+# our use of subclassing here causes weirdness for type checkers,
 # so we just pretend that we don't subclass
 if TYPE_CHECKING:
     GenericModel = BaseModel
diff --git a/src/together/_response.py b/src/together/_response.py
index fb38392e..7188c3e4 100644
--- a/src/together/_response.py
+++ b/src/together/_response.py
@@ -233,7 +233,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         # split is required to handle cases where additional information is included
         # in the response, e.g. application/json; charset=utf-8
         content_type, *_ = response.headers.get("content-type", "*").split(";")
-        if content_type != "application/json":
+        if not content_type.endswith("json"):
             if is_basemodel(cast_to):
                 try:
                     data = response.json()
diff --git a/src/together/_utils/_proxy.py b/src/together/_utils/_proxy.py
index ffd883e9..0f239a33 100644
--- a/src/together/_utils/_proxy.py
+++ b/src/together/_utils/_proxy.py
@@ -46,7 +46,10 @@ def __dir__(self) -> Iterable[str]:
     @property  # type: ignore
     @override
     def __class__(self) -> type:  # pyright: ignore
-        proxied = self.__get_proxied__()
+        try:
+            proxied = self.__get_proxied__()
+        except Exception:
+            return type(self)
         if issubclass(type(proxied), LazyProxy):
             return type(proxied)
         return proxied.__class__
diff --git a/src/together/_utils/_resources_proxy.py b/src/together/_utils/_resources_proxy.py
new file mode 100644
index 00000000..68dd27e7
--- /dev/null
+++ b/src/together/_utils/_resources_proxy.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from typing import Any
+from typing_extensions import override
+
+from ._proxy import LazyProxy
+
+
+class ResourcesProxy(LazyProxy[Any]):
+    """A proxy for the `together.resources` module.
+
+    This is used so that we can lazily import `together.resources` only when
+    needed *and* so that users can just import `together` and reference `together.resources`
+    """
+
+    @override
+    def __load__(self) -> Any:
+        import importlib
+
+        mod = importlib.import_module("together.resources")
+        return mod
+
+
+resources = ResourcesProxy().__as_proxied__()
diff --git a/src/together/_utils/_transform.py b/src/together/_utils/_transform.py
index 18afd9d8..b0cc20a7 100644
--- a/src/together/_utils/_transform.py
+++ b/src/together/_utils/_transform.py
@@ -5,13 +5,15 @@
 import pathlib
 from typing import Any, Mapping, TypeVar, cast
 from datetime import date, datetime
-from typing_extensions import Literal, get_args, override, get_type_hints
+from typing_extensions import Literal, get_args, override, get_type_hints as _get_type_hints
 
 import anyio
 import pydantic
 
 from ._utils import (
     is_list,
+    is_given,
+    lru_cache,
     is_mapping,
     is_iterable,
 )
@@ -108,6 +110,7 @@ class Params(TypedDict, total=False):
     return cast(_T, transformed)
 
 
+@lru_cache(maxsize=8096)
 def _get_annotated_type(type_: type) -> type | None:
     """If the given type is an `Annotated` type then it is returned, if not `None` is returned.
 
@@ -126,7 +129,7 @@ def _get_annotated_type(type_: type) -> type | None:
 def _maybe_transform_key(key: str, type_: type) -> str:
     """Transform the given `data` based on the annotations provided in `type_`.
 
-    Note: this function only looks at `Annotated` types that contain `PropertInfo` metadata.
+    Note: this function only looks at `Annotated` types that contain `PropertyInfo` metadata.
     """
     annotated_type = _get_annotated_type(type_)
     if annotated_type is None:
@@ -142,6 +145,10 @@ def _maybe_transform_key(key: str, type_: type) -> str:
     return key
 
 
+def _no_transform_needed(annotation: type) -> bool:
+    return annotation == float or annotation == int
+
+
 def _transform_recursive(
     data: object,
     *,
@@ -184,6 +191,15 @@ def _transform_recursive(
             return cast(object, data)
 
         inner_type = extract_type_arg(stripped_type, 0)
+        if _no_transform_needed(inner_type):
+            # for some types there is no need to transform anything, so we can get a small
+            # perf boost from skipping that work.
+            #
+            # but we still need to convert to a list to ensure the data is json-serializable
+            if is_list(data):
+                return data
+            return list(data)
+
         return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
     if is_union_type(stripped_type):
@@ -245,6 +261,11 @@ def _transform_typeddict(
     result: dict[str, object] = {}
     annotations = get_type_hints(expected_type, include_extras=True)
     for key, value in data.items():
+        if not is_given(value):
+            # we don't need to include `NotGiven` values here as they'll
+            # be stripped out before the request is sent anyway
+            continue
+
         type_ = annotations.get(key)
         if type_ is None:
             # we do not have a type annotation for this field, leave it as is
@@ -332,6 +353,15 @@ async def _async_transform_recursive(
             return cast(object, data)
 
         inner_type = extract_type_arg(stripped_type, 0)
+        if _no_transform_needed(inner_type):
+            # for some types there is no need to transform anything, so we can get a small
+            # perf boost from skipping that work.
+            #
+            # but we still need to convert to a list to ensure the data is json-serializable
+            if is_list(data):
+                return data
+            return list(data)
+
         return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
     if is_union_type(stripped_type):
@@ -393,6 +423,11 @@ async def _async_transform_typeddict(
     result: dict[str, object] = {}
     annotations = get_type_hints(expected_type, include_extras=True)
     for key, value in data.items():
+        if not is_given(value):
+            # we don't need to include `NotGiven` values here as they'll
+            # be stripped out before the request is sent anyway
+            continue
+
         type_ = annotations.get(key)
         if type_ is None:
             # we do not have a type annotation for this field, leave it as is
@@ -400,3 +435,13 @@ async def _async_transform_typeddict(
         else:
             result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_)
     return result
+
+
+@lru_cache(maxsize=8096)
+def get_type_hints(
+    obj: Any,
+    globalns: dict[str, Any] | None = None,
+    localns: Mapping[str, Any] | None = None,
+    include_extras: bool = False,
+) -> dict[str, Any]:
+    return _get_type_hints(obj, globalns=globalns, localns=localns, include_extras=include_extras)
diff --git a/src/together/_utils/_typing.py b/src/together/_utils/_typing.py
index 278749b1..1bac9542 100644
--- a/src/together/_utils/_typing.py
+++ b/src/together/_utils/_typing.py
@@ -13,6 +13,7 @@
     get_origin,
 )
 
+from ._utils import lru_cache
 from .._types import InheritsGeneric
 from .._compat import is_union as _is_union
 
@@ -66,6 +67,7 @@ def is_type_alias_type(tp: Any, /) -> TypeIs[typing_extensions.TypeAliasType]:
 
 
 # Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
+@lru_cache(maxsize=8096)
 def strip_annotated_type(typ: type) -> type:
     if is_required_type(typ) or is_annotated_type(typ):
         return strip_annotated_type(cast(type, get_args(typ)[0]))
@@ -108,7 +110,7 @@ class MyResponse(Foo[_T]):
     ```
     """
     cls = cast(object, get_origin(typ) or typ)
-    if cls in generic_bases:
+    if cls in generic_bases:  # pyright: ignore[reportUnnecessaryContains]
         # we're given the class directly
         return extract_type_arg(typ, index)
 
diff --git a/src/together/_utils/_utils.py b/src/together/_utils/_utils.py
index e5811bba..ea3cf3f2 100644
--- a/src/together/_utils/_utils.py
+++ b/src/together/_utils/_utils.py
@@ -72,8 +72,16 @@ def _extract_items(
         from .._files import assert_is_file_content
 
         # We have exhausted the path, return the entry we found.
-        assert_is_file_content(obj, key=flattened_key)
         assert flattened_key is not None
+
+        if is_list(obj):
+            files: list[tuple[str, FileTypes]] = []
+            for entry in obj:
+                assert_is_file_content(entry, key=flattened_key + "[]" if flattened_key else "")
+                files.append((flattened_key + "[]", cast(FileTypes, entry)))
+            return files
+
+        assert_is_file_content(obj, key=flattened_key)
         return [(flattened_key, cast(FileTypes, obj))]
 
     index += 1
diff --git a/src/together/_version.py b/src/together/_version.py
index 149ef267..2b3d6409 100644
--- a/src/together/_version.py
+++ b/src/together/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "together"
-__version__ = "0.1.0-alpha.3"  # x-release-please-version
+__version__ = "0.1.0-alpha.4"  # x-release-please-version
diff --git a/src/together/resources/__init__.py b/src/together/resources/__init__.py
index 14f5ae11..bd3e4c51 100644
--- a/src/together/resources/__init__.py
+++ b/src/together/resources/__init__.py
@@ -8,6 +8,14 @@
     ChatResourceWithStreamingResponse,
     AsyncChatResourceWithStreamingResponse,
 )
+from .jobs import (
+    JobsResource,
+    AsyncJobsResource,
+    JobsResourceWithRawResponse,
+    AsyncJobsResourceWithRawResponse,
+    JobsResourceWithStreamingResponse,
+    AsyncJobsResourceWithStreamingResponse,
+)
 from .audio import (
     AudioResource,
     AsyncAudioResource,
@@ -40,6 +48,22 @@
     ModelsResourceWithStreamingResponse,
     AsyncModelsResourceWithStreamingResponse,
 )
+from .hardware import (
+    HardwareResource,
+    AsyncHardwareResource,
+    HardwareResourceWithRawResponse,
+    AsyncHardwareResourceWithRawResponse,
+    HardwareResourceWithStreamingResponse,
+    AsyncHardwareResourceWithStreamingResponse,
+)
+from .endpoints import (
+    EndpointsResource,
+    AsyncEndpointsResource,
+    EndpointsResourceWithRawResponse,
+    AsyncEndpointsResourceWithRawResponse,
+    EndpointsResourceWithStreamingResponse,
+    AsyncEndpointsResourceWithStreamingResponse,
+)
 from .fine_tune import (
     FineTuneResource,
     AsyncFineTuneResource,
@@ -64,6 +88,14 @@
     CompletionsResourceWithStreamingResponse,
     AsyncCompletionsResourceWithStreamingResponse,
 )
+from .code_interpreter import (
+    CodeInterpreterResource,
+    AsyncCodeInterpreterResource,
+    CodeInterpreterResourceWithRawResponse,
+    AsyncCodeInterpreterResourceWithRawResponse,
+    CodeInterpreterResourceWithStreamingResponse,
+    AsyncCodeInterpreterResourceWithStreamingResponse,
+)
 
 __all__ = [
     "ChatResource",
@@ -96,6 +128,12 @@
     "AsyncFineTuneResourceWithRawResponse",
     "FineTuneResourceWithStreamingResponse",
     "AsyncFineTuneResourceWithStreamingResponse",
+    "CodeInterpreterResource",
+    "AsyncCodeInterpreterResource",
+    "CodeInterpreterResourceWithRawResponse",
+    "AsyncCodeInterpreterResourceWithRawResponse",
+    "CodeInterpreterResourceWithStreamingResponse",
+    "AsyncCodeInterpreterResourceWithStreamingResponse",
     "ImagesResource",
     "AsyncImagesResource",
     "ImagesResourceWithRawResponse",
@@ -114,4 +152,22 @@
     "AsyncModelsResourceWithRawResponse",
     "ModelsResourceWithStreamingResponse",
     "AsyncModelsResourceWithStreamingResponse",
+    "JobsResource",
+    "AsyncJobsResource",
+    "JobsResourceWithRawResponse",
+    "AsyncJobsResourceWithRawResponse",
+    "JobsResourceWithStreamingResponse",
+    "AsyncJobsResourceWithStreamingResponse",
+    "EndpointsResource",
+    "AsyncEndpointsResource",
+    "EndpointsResourceWithRawResponse",
+    "AsyncEndpointsResourceWithRawResponse",
+    "EndpointsResourceWithStreamingResponse",
+    "AsyncEndpointsResourceWithStreamingResponse",
+    "HardwareResource",
+    "AsyncHardwareResource",
+    "HardwareResourceWithRawResponse",
+    "AsyncHardwareResourceWithRawResponse",
+    "HardwareResourceWithStreamingResponse",
+    "AsyncHardwareResourceWithStreamingResponse",
 ]
diff --git a/src/together/resources/audio.py b/src/together/resources/audio.py
index c21fc754..7a8b2455 100644
--- a/src/together/resources/audio.py
+++ b/src/together/resources/audio.py
@@ -9,10 +9,7 @@
 
 from ..types import audio_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py
index be27ee86..9b9b413a 100644
--- a/src/together/resources/chat/completions.py
+++ b/src/together/resources/chat/completions.py
@@ -8,11 +8,7 @@
 import httpx
 
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
+from ..._utils import required_args, maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import (
@@ -515,7 +511,9 @@ def create(
                     "top_k": top_k,
                     "top_p": top_p,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -1010,7 +1008,9 @@ async def create(
                     "top_k": top_k,
                     "top_p": top_p,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/together/resources/code_interpreter/__init__.py b/src/together/resources/code_interpreter/__init__.py
new file mode 100644
index 00000000..d5e30afe
--- /dev/null
+++ b/src/together/resources/code_interpreter/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .sessions import (
+    SessionsResource,
+    AsyncSessionsResource,
+    SessionsResourceWithRawResponse,
+    AsyncSessionsResourceWithRawResponse,
+    SessionsResourceWithStreamingResponse,
+    AsyncSessionsResourceWithStreamingResponse,
+)
+from .code_interpreter import (
+    CodeInterpreterResource,
+    AsyncCodeInterpreterResource,
+    CodeInterpreterResourceWithRawResponse,
+    AsyncCodeInterpreterResourceWithRawResponse,
+    CodeInterpreterResourceWithStreamingResponse,
+    AsyncCodeInterpreterResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "SessionsResource",
+    "AsyncSessionsResource",
+    "SessionsResourceWithRawResponse",
+    "AsyncSessionsResourceWithRawResponse",
+    "SessionsResourceWithStreamingResponse",
+    "AsyncSessionsResourceWithStreamingResponse",
+    "CodeInterpreterResource",
+    "AsyncCodeInterpreterResource",
+    "CodeInterpreterResourceWithRawResponse",
+    "AsyncCodeInterpreterResourceWithRawResponse",
+    "CodeInterpreterResourceWithStreamingResponse",
+    "AsyncCodeInterpreterResourceWithStreamingResponse",
+]
diff --git a/src/together/resources/code_interpreter/code_interpreter.py b/src/together/resources/code_interpreter/code_interpreter.py
new file mode 100644
index 00000000..8a9b777d
--- /dev/null
+++ b/src/together/resources/code_interpreter/code_interpreter.py
@@ -0,0 +1,258 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, Iterable, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ...types import code_interpreter_execute_params
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from .sessions import (
+    SessionsResource,
+    AsyncSessionsResource,
+    SessionsResourceWithRawResponse,
+    AsyncSessionsResourceWithRawResponse,
+    SessionsResourceWithStreamingResponse,
+    AsyncSessionsResourceWithStreamingResponse,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.execute_response import ExecuteResponse
+
+__all__ = ["CodeInterpreterResource", "AsyncCodeInterpreterResource"]
+
+
+class CodeInterpreterResource(SyncAPIResource):
+    @cached_property
+    def sessions(self) -> SessionsResource:
+        return SessionsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> CodeInterpreterResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return CodeInterpreterResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CodeInterpreterResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return CodeInterpreterResourceWithStreamingResponse(self)
+
+    def execute(
+        self,
+        *,
+        code: str,
+        language: Literal["python"],
+        files: Iterable[code_interpreter_execute_params.File] | NotGiven = NOT_GIVEN,
+        session_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ExecuteResponse:
+        """Executes the given code snippet and returns the output.
+
+        Without a session_id, a
+        new session will be created to run the code. If you do pass in a valid
+        session_id, the code will be run in that session. This is useful for running
+        multiple code snippets in the same environment, because dependencies and similar
+        things are persisted between calls to the same session.
+
+        Args:
+          code: Code snippet to execute.
+
+          language: Programming language for the code to execute. Currently only supports Python,
+              but more will be added.
+
+          files: Files to upload to the session. If present, files will be uploaded before
+              executing the given code.
+
+          session_id: Identifier of the current session. Used to make follow-up calls. Requests will
+              return an error if the session does not belong to the caller or has expired.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return cast(
+            ExecuteResponse,
+            self._post(
+                "/tci/execute",
+                body=maybe_transform(
+                    {
+                        "code": code,
+                        "language": language,
+                        "files": files,
+                        "session_id": session_id,
+                    },
+                    code_interpreter_execute_params.CodeInterpreterExecuteParams,
+                ),
+                options=make_request_options(
+                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                ),
+                cast_to=cast(Any, ExecuteResponse),  # Union types cannot be passed in as arguments in the type system
+            ),
+        )
+
+
+class AsyncCodeInterpreterResource(AsyncAPIResource):
+    @cached_property
+    def sessions(self) -> AsyncSessionsResource:
+        return AsyncSessionsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncCodeInterpreterResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCodeInterpreterResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCodeInterpreterResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return AsyncCodeInterpreterResourceWithStreamingResponse(self)
+
+    async def execute(
+        self,
+        *,
+        code: str,
+        language: Literal["python"],
+        files: Iterable[code_interpreter_execute_params.File] | NotGiven = NOT_GIVEN,
+        session_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ExecuteResponse:
+        """Executes the given code snippet and returns the output.
+
+        Without a session_id, a
+        new session will be created to run the code. If you do pass in a valid
+        session_id, the code will be run in that session. This is useful for running
+        multiple code snippets in the same environment, because dependencies and similar
+        things are persisted between calls to the same session.
+
+        Args:
+          code: Code snippet to execute.
+
+          language: Programming language for the code to execute. Currently only supports Python,
+              but more will be added.
+
+          files: Files to upload to the session. If present, files will be uploaded before
+              executing the given code.
+
+          session_id: Identifier of the current session. Used to make follow-up calls. Requests will
+              return an error if the session does not belong to the caller or has expired.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return cast(
+            ExecuteResponse,
+            await self._post(
+                "/tci/execute",
+                body=await async_maybe_transform(
+                    {
+                        "code": code,
+                        "language": language,
+                        "files": files,
+                        "session_id": session_id,
+                    },
+                    code_interpreter_execute_params.CodeInterpreterExecuteParams,
+                ),
+                options=make_request_options(
+                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                ),
+                cast_to=cast(Any, ExecuteResponse),  # Union types cannot be passed in as arguments in the type system
+            ),
+        )
+
+
+class CodeInterpreterResourceWithRawResponse:
+    def __init__(self, code_interpreter: CodeInterpreterResource) -> None:
+        self._code_interpreter = code_interpreter
+
+        self.execute = to_raw_response_wrapper(
+            code_interpreter.execute,
+        )
+
+    @cached_property
+    def sessions(self) -> SessionsResourceWithRawResponse:
+        return SessionsResourceWithRawResponse(self._code_interpreter.sessions)
+
+
+class AsyncCodeInterpreterResourceWithRawResponse:
+    def __init__(self, code_interpreter: AsyncCodeInterpreterResource) -> None:
+        self._code_interpreter = code_interpreter
+
+        self.execute = async_to_raw_response_wrapper(
+            code_interpreter.execute,
+        )
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsResourceWithRawResponse:
+        return AsyncSessionsResourceWithRawResponse(self._code_interpreter.sessions)
+
+
+class CodeInterpreterResourceWithStreamingResponse:
+    def __init__(self, code_interpreter: CodeInterpreterResource) -> None:
+        self._code_interpreter = code_interpreter
+
+        self.execute = to_streamed_response_wrapper(
+            code_interpreter.execute,
+        )
+
+    @cached_property
+    def sessions(self) -> SessionsResourceWithStreamingResponse:
+        return SessionsResourceWithStreamingResponse(self._code_interpreter.sessions)
+
+
+class AsyncCodeInterpreterResourceWithStreamingResponse:
+    def __init__(self, code_interpreter: AsyncCodeInterpreterResource) -> None:
+        self._code_interpreter = code_interpreter
+
+        self.execute = async_to_streamed_response_wrapper(
+            code_interpreter.execute,
+        )
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsResourceWithStreamingResponse:
+        return AsyncSessionsResourceWithStreamingResponse(self._code_interpreter.sessions)
diff --git a/src/together/resources/code_interpreter/sessions.py b/src/together/resources/code_interpreter/sessions.py
new file mode 100644
index 00000000..c4f3a8b0
--- /dev/null
+++ b/src/together/resources/code_interpreter/sessions.py
@@ -0,0 +1,135 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.code_interpreter.session_list_response import SessionListResponse
+
+__all__ = ["SessionsResource", "AsyncSessionsResource"]
+
+
+class SessionsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SessionsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return SessionsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SessionsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return SessionsResourceWithStreamingResponse(self)
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionListResponse:
+        """Lists all your currently active sessions."""
+        return self._get(
+            "/tci/sessions",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionListResponse,
+        )
+
+
+class AsyncSessionsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSessionsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSessionsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSessionsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return AsyncSessionsResourceWithStreamingResponse(self)
+
+    async def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionListResponse:
+        """Lists all your currently active sessions."""
+        return await self._get(
+            "/tci/sessions",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionListResponse,
+        )
+
+
+class SessionsResourceWithRawResponse:
+    def __init__(self, sessions: SessionsResource) -> None:
+        self._sessions = sessions
+
+        self.list = to_raw_response_wrapper(
+            sessions.list,
+        )
+
+
+class AsyncSessionsResourceWithRawResponse:
+    def __init__(self, sessions: AsyncSessionsResource) -> None:
+        self._sessions = sessions
+
+        self.list = async_to_raw_response_wrapper(
+            sessions.list,
+        )
+
+
+class SessionsResourceWithStreamingResponse:
+    def __init__(self, sessions: SessionsResource) -> None:
+        self._sessions = sessions
+
+        self.list = to_streamed_response_wrapper(
+            sessions.list,
+        )
+
+
+class AsyncSessionsResourceWithStreamingResponse:
+    def __init__(self, sessions: AsyncSessionsResource) -> None:
+        self._sessions = sessions
+
+        self.list = async_to_streamed_response_wrapper(
+            sessions.list,
+        )
diff --git a/src/together/resources/completions.py b/src/together/resources/completions.py
index 9f1a0ba0..d201dcd2 100644
--- a/src/together/resources/completions.py
+++ b/src/together/resources/completions.py
@@ -9,11 +9,7 @@
 
 from ..types import completion_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
+from .._utils import required_args, maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -442,7 +438,9 @@ def create(
                     "top_k": top_k,
                     "top_p": top_p,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -866,7 +864,9 @@ async def create(
                     "top_k": top_k,
                     "top_p": top_p,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/together/resources/embeddings.py b/src/together/resources/embeddings.py
index 936cfbc1..36688cb3 100644
--- a/src/together/resources/embeddings.py
+++ b/src/together/resources/embeddings.py
@@ -9,10 +9,7 @@
 
 from ..types import embedding_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
diff --git a/src/together/resources/endpoints.py b/src/together/resources/endpoints.py
new file mode 100644
index 00000000..933c1fc5
--- /dev/null
+++ b/src/together/resources/endpoints.py
@@ -0,0 +1,627 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ..types import endpoint_list_params, endpoint_create_params, endpoint_update_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.endpoint_list_response import EndpointListResponse
+from ..types.endpoint_create_response import EndpointCreateResponse
+from ..types.endpoint_update_response import EndpointUpdateResponse
+from ..types.endpoint_retrieve_response import EndpointRetrieveResponse
+
+__all__ = ["EndpointsResource", "AsyncEndpointsResource"]
+
+
+class EndpointsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> EndpointsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return EndpointsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> EndpointsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return EndpointsResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        autoscaling: endpoint_create_params.Autoscaling,
+        hardware: str,
+        model: str,
+        disable_prompt_cache: bool | NotGiven = NOT_GIVEN,
+        disable_speculative_decoding: bool | NotGiven = NOT_GIVEN,
+        display_name: str | NotGiven = NOT_GIVEN,
+        inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN,
+        state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EndpointCreateResponse:
+        """Creates a new dedicated endpoint for serving models.
+
+        The endpoint will
+        automatically start after creation. You can deploy any supported model on
+        hardware configurations that meet the model's requirements.
+
+        Args:
+          autoscaling: Configuration for automatic scaling of the endpoint
+
+          hardware: The hardware configuration to use for this endpoint
+
+          model: The model to deploy on this endpoint
+
+          disable_prompt_cache: Whether to disable the prompt cache for this endpoint
+
+          disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
+
+          display_name: A human-readable name for the endpoint
+
+          inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+              automatically stopped. Set to null, omit or set to 0 to disable automatic
+              timeout.
+
+          state: The desired state of the endpoint
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/endpoints",
+            body=maybe_transform(
+                {
+                    "autoscaling": autoscaling,
+                    "hardware": hardware,
+                    "model": model,
+                    "disable_prompt_cache": disable_prompt_cache,
+                    "disable_speculative_decoding": disable_speculative_decoding,
+                    "display_name": display_name,
+                    "inactive_timeout": inactive_timeout,
+                    "state": state,
+                },
+                endpoint_create_params.EndpointCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EndpointCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        endpoint_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EndpointRetrieveResponse:
+        """
+        Retrieves details about a specific endpoint, including its current state,
+        configuration, and scaling settings.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        return self._get(
+            f"/endpoints/{endpoint_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EndpointRetrieveResponse,
+        )
+
+    def update(
+        self,
+        endpoint_id: str,
+        *,
+        autoscaling: endpoint_update_params.Autoscaling | NotGiven = NOT_GIVEN,
+        display_name: str | NotGiven = NOT_GIVEN,
+        inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN,
+        state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EndpointUpdateResponse:
+        """Updates an existing endpoint's configuration.
+
+        You can modify the display name,
+        autoscaling settings, or change the endpoint's state (start/stop).
+
+        Args:
+          autoscaling: New autoscaling configuration for the endpoint
+
+          display_name: A human-readable name for the endpoint
+
+          inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+              automatically stopped. Set to 0 to disable automatic timeout.
+
+          state: The desired state of the endpoint
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        return self._patch(
+            f"/endpoints/{endpoint_id}",
+            body=maybe_transform(
+                {
+                    "autoscaling": autoscaling,
+                    "display_name": display_name,
+                    "inactive_timeout": inactive_timeout,
+                    "state": state,
+                },
+                endpoint_update_params.EndpointUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EndpointUpdateResponse,
+        )
+
+    def list(
+        self,
+        *,
+        type: Literal["dedicated", "serverless"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EndpointListResponse:
+        """Returns a list of all endpoints associated with your account.
+
+        You can filter the
+        results by type (dedicated or serverless).
+
+        Args:
+          type: Filter endpoints by type
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get(
+            "/endpoints",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"type": type}, endpoint_list_params.EndpointListParams),
+            ),
+            cast_to=EndpointListResponse,
+        )
+
+    def delete(
+        self,
+        endpoint_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """Permanently deletes an endpoint.
+
+        This action cannot be undone.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/endpoints/{endpoint_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncEndpointsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncEndpointsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return AsyncEndpointsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncEndpointsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return AsyncEndpointsResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        autoscaling: endpoint_create_params.Autoscaling,
+        hardware: str,
+        model: str,
+        disable_prompt_cache: bool | NotGiven = NOT_GIVEN,
+        disable_speculative_decoding: bool | NotGiven = NOT_GIVEN,
+        display_name: str | NotGiven = NOT_GIVEN,
+        inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN,
+        state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EndpointCreateResponse:
+        """Creates a new dedicated endpoint for serving models.
+
+        The endpoint will
+        automatically start after creation. You can deploy any supported model on
+        hardware configurations that meet the model's requirements.
+
+        Args:
+          autoscaling: Configuration for automatic scaling of the endpoint
+
+          hardware: The hardware configuration to use for this endpoint
+
+          model: The model to deploy on this endpoint
+
+          disable_prompt_cache: Whether to disable the prompt cache for this endpoint
+
+          disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
+
+          display_name: A human-readable name for the endpoint
+
+          inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+              automatically stopped. Set to null, omit or set to 0 to disable automatic
+              timeout.
+
+          state: The desired state of the endpoint
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/endpoints",
+            body=await async_maybe_transform(
+                {
+                    "autoscaling": autoscaling,
+                    "hardware": hardware,
+                    "model": model,
+                    "disable_prompt_cache": disable_prompt_cache,
+                    "disable_speculative_decoding": disable_speculative_decoding,
+                    "display_name": display_name,
+                    "inactive_timeout": inactive_timeout,
+                    "state": state,
+                },
+                endpoint_create_params.EndpointCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EndpointCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        endpoint_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EndpointRetrieveResponse:
+        """
+        Retrieves details about a specific endpoint, including its current state,
+        configuration, and scaling settings.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        return await self._get(
+            f"/endpoints/{endpoint_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EndpointRetrieveResponse,
+        )
+
+    async def update(
+        self,
+        endpoint_id: str,
+        *,
+        autoscaling: endpoint_update_params.Autoscaling | NotGiven = NOT_GIVEN,
+        display_name: str | NotGiven = NOT_GIVEN,
+        inactive_timeout: Optional[int] | NotGiven = NOT_GIVEN,
+        state: Literal["STARTED", "STOPPED"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EndpointUpdateResponse:
+        """Updates an existing endpoint's configuration.
+
+        You can modify the display name,
+        autoscaling settings, or change the endpoint's state (start/stop).
+
+        Args:
+          autoscaling: New autoscaling configuration for the endpoint
+
+          display_name: A human-readable name for the endpoint
+
+          inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+              automatically stopped. Set to 0 to disable automatic timeout.
+
+          state: The desired state of the endpoint
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        return await self._patch(
+            f"/endpoints/{endpoint_id}",
+            body=await async_maybe_transform(
+                {
+                    "autoscaling": autoscaling,
+                    "display_name": display_name,
+                    "inactive_timeout": inactive_timeout,
+                    "state": state,
+                },
+                endpoint_update_params.EndpointUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EndpointUpdateResponse,
+        )
+
+    async def list(
+        self,
+        *,
+        type: Literal["dedicated", "serverless"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EndpointListResponse:
+        """Returns a list of all endpoints associated with your account.
+
+        You can filter the
+        results by type (dedicated or serverless).
+
+        Args:
+          type: Filter endpoints by type
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._get(
+            "/endpoints",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"type": type}, endpoint_list_params.EndpointListParams),
+            ),
+            cast_to=EndpointListResponse,
+        )
+
+    async def delete(
+        self,
+        endpoint_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """Permanently deletes an endpoint.
+
+        This action cannot be undone.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/endpoints/{endpoint_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class EndpointsResourceWithRawResponse:
+    def __init__(self, endpoints: EndpointsResource) -> None:
+        self._endpoints = endpoints
+
+        self.create = to_raw_response_wrapper(
+            endpoints.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            endpoints.retrieve,
+        )
+        self.update = to_raw_response_wrapper(
+            endpoints.update,
+        )
+        self.list = to_raw_response_wrapper(
+            endpoints.list,
+        )
+        self.delete = to_raw_response_wrapper(
+            endpoints.delete,
+        )
+
+
+class AsyncEndpointsResourceWithRawResponse:
+    def __init__(self, endpoints: AsyncEndpointsResource) -> None:
+        self._endpoints = endpoints
+
+        self.create = async_to_raw_response_wrapper(
+            endpoints.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            endpoints.retrieve,
+        )
+        self.update = async_to_raw_response_wrapper(
+            endpoints.update,
+        )
+        self.list = async_to_raw_response_wrapper(
+            endpoints.list,
+        )
+        self.delete = async_to_raw_response_wrapper(
+            endpoints.delete,
+        )
+
+
+class EndpointsResourceWithStreamingResponse:
+    def __init__(self, endpoints: EndpointsResource) -> None:
+        self._endpoints = endpoints
+
+        self.create = to_streamed_response_wrapper(
+            endpoints.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            endpoints.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            endpoints.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            endpoints.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            endpoints.delete,
+        )
+
+
+class AsyncEndpointsResourceWithStreamingResponse:
+    def __init__(self, endpoints: AsyncEndpointsResource) -> None:
+        self._endpoints = endpoints
+
+        self.create = async_to_streamed_response_wrapper(
+            endpoints.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            endpoints.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            endpoints.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            endpoints.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            endpoints.delete,
+        )
diff --git a/src/together/resources/fine_tune.py b/src/together/resources/fine_tune.py
index b3b62585..84619f78 100644
--- a/src/together/resources/fine_tune.py
+++ b/src/together/resources/fine_tune.py
@@ -9,10 +9,7 @@
 
 from ..types import fine_tune_create_params, fine_tune_download_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -55,7 +52,8 @@ def create(
         *,
         model: str,
         training_file: str,
-        batch_size: int | NotGiven = NOT_GIVEN,
+        batch_size: Union[int, Literal["max"]] | NotGiven = NOT_GIVEN,
+        from_checkpoint: str | NotGiven = NOT_GIVEN,
         learning_rate: float | NotGiven = NOT_GIVEN,
         lr_scheduler: fine_tune_create_params.LrScheduler | NotGiven = NOT_GIVEN,
         max_grad_norm: float | NotGiven = NOT_GIVEN,
@@ -64,6 +62,7 @@ def create(
         n_evals: int | NotGiven = NOT_GIVEN,
         suffix: str | NotGiven = NOT_GIVEN,
         train_on_inputs: Union[bool, Literal["auto"]] | NotGiven = NOT_GIVEN,
+        training_method: fine_tune_create_params.TrainingMethod | NotGiven = NOT_GIVEN,
         training_type: fine_tune_create_params.TrainingType | NotGiven = NOT_GIVEN,
         validation_file: str | NotGiven = NOT_GIVEN,
         wandb_api_key: str | NotGiven = NOT_GIVEN,
@@ -88,11 +87,20 @@ def create(
           training_file: File-ID of a training file uploaded to the Together API
 
           batch_size: Number of training examples processed together (larger batches use more memory
-              but may train faster)
+              but may train faster). Defaults to "max". We use training optimizations like
+              packing, so the effective batch size may be different than the value you set.
+
+          from_checkpoint: The checkpoint identifier to continue training from a previous fine-tuning job.
+              Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or
+              `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the
+              final checkpoint will be used.
 
           learning_rate: Controls how quickly the model adapts to new information (too high may cause
               instability, too low may slow convergence)
 
+          lr_scheduler: The learning rate scheduler to use. It specifies how the learning rate is
+              adjusted during training.
+
           max_grad_norm: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
 
           n_checkpoints: Number of intermediate model versions saved during training for evaluation
@@ -107,6 +115,9 @@ def create(
           train_on_inputs: Whether to mask the user messages in conversational data or prompts in
               instruction data.
 
+          training_method: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct
+              Preference Optimization.
+
           validation_file: File-ID of a validation file uploaded to the Together API
 
           wandb_api_key: Integration key for tracking experiments and model metrics on W&B platform
@@ -121,7 +132,7 @@ def create(
           warmup_ratio: The percent of steps at the start of training to linearly increase the learning
               rate.
 
-          weight_decay: Weight decay
+          weight_decay: Weight decay. Regularization parameter for the optimizer.
 
           extra_headers: Send extra headers
 
@@ -138,6 +149,7 @@ def create(
                     "model": model,
                     "training_file": training_file,
                     "batch_size": batch_size,
+                    "from_checkpoint": from_checkpoint,
                     "learning_rate": learning_rate,
                     "lr_scheduler": lr_scheduler,
                     "max_grad_norm": max_grad_norm,
@@ -146,6 +158,7 @@ def create(
                     "n_evals": n_evals,
                     "suffix": suffix,
                     "train_on_inputs": train_on_inputs,
+                    "training_method": training_method,
                     "training_type": training_type,
                     "validation_file": validation_file,
                     "wandb_api_key": wandb_api_key,
@@ -364,7 +377,8 @@ async def create(
         *,
         model: str,
         training_file: str,
-        batch_size: int | NotGiven = NOT_GIVEN,
+        batch_size: Union[int, Literal["max"]] | NotGiven = NOT_GIVEN,
+        from_checkpoint: str | NotGiven = NOT_GIVEN,
         learning_rate: float | NotGiven = NOT_GIVEN,
         lr_scheduler: fine_tune_create_params.LrScheduler | NotGiven = NOT_GIVEN,
         max_grad_norm: float | NotGiven = NOT_GIVEN,
@@ -373,6 +387,7 @@ async def create(
         n_evals: int | NotGiven = NOT_GIVEN,
         suffix: str | NotGiven = NOT_GIVEN,
         train_on_inputs: Union[bool, Literal["auto"]] | NotGiven = NOT_GIVEN,
+        training_method: fine_tune_create_params.TrainingMethod | NotGiven = NOT_GIVEN,
         training_type: fine_tune_create_params.TrainingType | NotGiven = NOT_GIVEN,
         validation_file: str | NotGiven = NOT_GIVEN,
         wandb_api_key: str | NotGiven = NOT_GIVEN,
@@ -397,11 +412,20 @@ async def create(
           training_file: File-ID of a training file uploaded to the Together API
 
           batch_size: Number of training examples processed together (larger batches use more memory
-              but may train faster)
+              but may train faster). Defaults to "max". We use training optimizations like
+              packing, so the effective batch size may be different than the value you set.
+
+          from_checkpoint: The checkpoint identifier to continue training from a previous fine-tuning job.
+              Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or
+              `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the
+              final checkpoint will be used.
 
           learning_rate: Controls how quickly the model adapts to new information (too high may cause
               instability, too low may slow convergence)
 
+          lr_scheduler: The learning rate scheduler to use. It specifies how the learning rate is
+              adjusted during training.
+
           max_grad_norm: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
 
           n_checkpoints: Number of intermediate model versions saved during training for evaluation
@@ -416,6 +440,9 @@ async def create(
           train_on_inputs: Whether to mask the user messages in conversational data or prompts in
               instruction data.
 
+          training_method: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct
+              Preference Optimization.
+
           validation_file: File-ID of a validation file uploaded to the Together API
 
           wandb_api_key: Integration key for tracking experiments and model metrics on W&B platform
@@ -430,7 +457,7 @@ async def create(
           warmup_ratio: The percent of steps at the start of training to linearly increase the learning
               rate.
 
-          weight_decay: Weight decay
+          weight_decay: Weight decay. Regularization parameter for the optimizer.
 
           extra_headers: Send extra headers
 
@@ -447,6 +474,7 @@ async def create(
                     "model": model,
                     "training_file": training_file,
                     "batch_size": batch_size,
+                    "from_checkpoint": from_checkpoint,
                     "learning_rate": learning_rate,
                     "lr_scheduler": lr_scheduler,
                     "max_grad_norm": max_grad_norm,
@@ -455,6 +483,7 @@ async def create(
                     "n_evals": n_evals,
                     "suffix": suffix,
                     "train_on_inputs": train_on_inputs,
+                    "training_method": training_method,
                     "training_type": training_type,
                     "validation_file": validation_file,
                     "wandb_api_key": wandb_api_key,
diff --git a/src/together/resources/hardware.py b/src/together/resources/hardware.py
new file mode 100644
index 00000000..302e6303
--- /dev/null
+++ b/src/together/resources/hardware.py
@@ -0,0 +1,181 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ..types import hardware_list_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.hardware_list_response import HardwareListResponse
+
+__all__ = ["HardwareResource", "AsyncHardwareResource"]
+
+
+class HardwareResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> HardwareResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return HardwareResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> HardwareResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return HardwareResourceWithStreamingResponse(self)
+
+    def list(
+        self,
+        *,
+        model: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> HardwareListResponse:
+        """Returns a list of available hardware configurations for deploying models.
+
+        When a
+        model parameter is provided, it returns only hardware configurations compatible
+        with that model, including their current availability status.
+
+        Args:
+          model: Filter hardware configurations by model compatibility. When provided, the
+              response includes availability status for each compatible configuration.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get(
+            "/hardware",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"model": model}, hardware_list_params.HardwareListParams),
+            ),
+            cast_to=HardwareListResponse,
+        )
+
+
+class AsyncHardwareResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncHardwareResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return AsyncHardwareResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncHardwareResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return AsyncHardwareResourceWithStreamingResponse(self)
+
+    async def list(
+        self,
+        *,
+        model: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> HardwareListResponse:
+        """Returns a list of available hardware configurations for deploying models.
+
+        When a
+        model parameter is provided, it returns only hardware configurations compatible
+        with that model, including their current availability status.
+
+        Args:
+          model: Filter hardware configurations by model compatibility. When provided, the
+              response includes availability status for each compatible configuration.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._get(
+            "/hardware",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"model": model}, hardware_list_params.HardwareListParams),
+            ),
+            cast_to=HardwareListResponse,
+        )
+
+
+class HardwareResourceWithRawResponse:
+    def __init__(self, hardware: HardwareResource) -> None:
+        self._hardware = hardware
+
+        self.list = to_raw_response_wrapper(
+            hardware.list,
+        )
+
+
+class AsyncHardwareResourceWithRawResponse:
+    def __init__(self, hardware: AsyncHardwareResource) -> None:
+        self._hardware = hardware
+
+        self.list = async_to_raw_response_wrapper(
+            hardware.list,
+        )
+
+
+class HardwareResourceWithStreamingResponse:
+    def __init__(self, hardware: HardwareResource) -> None:
+        self._hardware = hardware
+
+        self.list = to_streamed_response_wrapper(
+            hardware.list,
+        )
+
+
+class AsyncHardwareResourceWithStreamingResponse:
+    def __init__(self, hardware: AsyncHardwareResource) -> None:
+        self._hardware = hardware
+
+        self.list = async_to_streamed_response_wrapper(
+            hardware.list,
+        )
diff --git a/src/together/resources/images.py b/src/together/resources/images.py
index c92542df..30526e1d 100644
--- a/src/together/resources/images.py
+++ b/src/together/resources/images.py
@@ -9,10 +9,7 @@
 
 from ..types import image_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
diff --git a/src/together/resources/jobs.py b/src/together/resources/jobs.py
new file mode 100644
index 00000000..2eae45ab
--- /dev/null
+++ b/src/together/resources/jobs.py
@@ -0,0 +1,214 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.job_list_response import JobListResponse
+from ..types.job_retrieve_response import JobRetrieveResponse
+
+__all__ = ["JobsResource", "AsyncJobsResource"]
+
+
+class JobsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> JobsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return JobsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> JobsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return JobsResourceWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> JobRetrieveResponse:
+        """
+        Get the status of a specific job
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not job_id:
+            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+        return self._get(
+            f"/jobs/{job_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=JobRetrieveResponse,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> JobListResponse:
+        """List all jobs and their statuses"""
+        return self._get(
+            "/jobs",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=JobListResponse,
+        )
+
+
+class AsyncJobsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncJobsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return AsyncJobsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncJobsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
+        """
+        return AsyncJobsResourceWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        job_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> JobRetrieveResponse:
+        """
+        Get the status of a specific job
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not job_id:
+            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+        return await self._get(
+            f"/jobs/{job_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=JobRetrieveResponse,
+        )
+
+    async def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> JobListResponse:
+        """List all jobs and their statuses"""
+        return await self._get(
+            "/jobs",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=JobListResponse,
+        )
+
+
+class JobsResourceWithRawResponse:
+    def __init__(self, jobs: JobsResource) -> None:
+        self._jobs = jobs
+
+        self.retrieve = to_raw_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = to_raw_response_wrapper(
+            jobs.list,
+        )
+
+
+class AsyncJobsResourceWithRawResponse:
+    def __init__(self, jobs: AsyncJobsResource) -> None:
+        self._jobs = jobs
+
+        self.retrieve = async_to_raw_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = async_to_raw_response_wrapper(
+            jobs.list,
+        )
+
+
+class JobsResourceWithStreamingResponse:
+    def __init__(self, jobs: JobsResource) -> None:
+        self._jobs = jobs
+
+        self.retrieve = to_streamed_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            jobs.list,
+        )
+
+
+class AsyncJobsResourceWithStreamingResponse:
+    def __init__(self, jobs: AsyncJobsResource) -> None:
+        self._jobs = jobs
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            jobs.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            jobs.list,
+        )
diff --git a/src/together/resources/models.py b/src/together/resources/models.py
index 1432e2c0..b4a9cc73 100644
--- a/src/together/resources/models.py
+++ b/src/together/resources/models.py
@@ -2,9 +2,13 @@
 
 from __future__ import annotations
 
+from typing_extensions import Literal
+
 import httpx
 
+from ..types import model_upload_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -15,6 +19,7 @@
 )
 from .._base_client import make_request_options
 from ..types.model_list_response import ModelListResponse
+from ..types.model_upload_response import ModelUploadResponse
 
 __all__ = ["ModelsResource", "AsyncModelsResource"]
 
@@ -58,6 +63,71 @@ def list(
             cast_to=ModelListResponse,
         )
 
+    def upload(
+        self,
+        *,
+        model_name: str,
+        model_source: str,
+        base_model: str | NotGiven = NOT_GIVEN,
+        description: str | NotGiven = NOT_GIVEN,
+        hf_token: str | NotGiven = NOT_GIVEN,
+        lora_model: str | NotGiven = NOT_GIVEN,
+        model_type: Literal["model", "adapter"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelUploadResponse:
+        """
+        Upload a custom model or adapter from Hugging Face or S3
+
+        Args:
+          model_name: The name to give to your uploaded model
+
+          model_source: The source location of the model (Hugging Face repo or S3 path)
+
+          base_model: The base model to use for an adapter if setting it to run against a serverless
+              pool. Only used for model_type `adapter`.
+
+          description: A description of your model
+
+          hf_token: Hugging Face token (if uploading from Hugging Face)
+
+          lora_model: The lora pool to use for an adapter if setting it to run against, say, a
+              dedicated pool. Only used for model_type `adapter`.
+
+          model_type: Whether the model is a full model or an adapter
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/models",
+            body=maybe_transform(
+                {
+                    "model_name": model_name,
+                    "model_source": model_source,
+                    "base_model": base_model,
+                    "description": description,
+                    "hf_token": hf_token,
+                    "lora_model": lora_model,
+                    "model_type": model_type,
+                },
+                model_upload_params.ModelUploadParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelUploadResponse,
+        )
+
 
 class AsyncModelsResource(AsyncAPIResource):
     @cached_property
@@ -98,6 +168,71 @@ async def list(
             cast_to=ModelListResponse,
         )
 
+    async def upload(
+        self,
+        *,
+        model_name: str,
+        model_source: str,
+        base_model: str | NotGiven = NOT_GIVEN,
+        description: str | NotGiven = NOT_GIVEN,
+        hf_token: str | NotGiven = NOT_GIVEN,
+        lora_model: str | NotGiven = NOT_GIVEN,
+        model_type: Literal["model", "adapter"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelUploadResponse:
+        """
+        Upload a custom model or adapter from Hugging Face or S3
+
+        Args:
+          model_name: The name to give to your uploaded model
+
+          model_source: The source location of the model (Hugging Face repo or S3 path)
+
+          base_model: The base model to use for an adapter if setting it to run against a serverless
+              pool. Only used for model_type `adapter`.
+
+          description: A description of your model
+
+          hf_token: Hugging Face token (if uploading from Hugging Face)
+
+          lora_model: The lora pool to use for an adapter if setting it to run against, say, a
+              dedicated pool. Only used for model_type `adapter`.
+
+          model_type: Whether the model is a full model or an adapter
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/models",
+            body=await async_maybe_transform(
+                {
+                    "model_name": model_name,
+                    "model_source": model_source,
+                    "base_model": base_model,
+                    "description": description,
+                    "hf_token": hf_token,
+                    "lora_model": lora_model,
+                    "model_type": model_type,
+                },
+                model_upload_params.ModelUploadParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelUploadResponse,
+        )
+
 
 class ModelsResourceWithRawResponse:
     def __init__(self, models: ModelsResource) -> None:
@@ -106,6 +241,9 @@ def __init__(self, models: ModelsResource) -> None:
         self.list = to_raw_response_wrapper(
             models.list,
         )
+        self.upload = to_raw_response_wrapper(
+            models.upload,
+        )
 
 
 class AsyncModelsResourceWithRawResponse:
@@ -115,6 +253,9 @@ def __init__(self, models: AsyncModelsResource) -> None:
         self.list = async_to_raw_response_wrapper(
             models.list,
         )
+        self.upload = async_to_raw_response_wrapper(
+            models.upload,
+        )
 
 
 class ModelsResourceWithStreamingResponse:
@@ -124,6 +265,9 @@ def __init__(self, models: ModelsResource) -> None:
         self.list = to_streamed_response_wrapper(
             models.list,
         )
+        self.upload = to_streamed_response_wrapper(
+            models.upload,
+        )
 
 
 class AsyncModelsResourceWithStreamingResponse:
@@ -133,3 +277,6 @@ def __init__(self, models: AsyncModelsResource) -> None:
         self.list = async_to_streamed_response_wrapper(
             models.list,
         )
+        self.upload = async_to_streamed_response_wrapper(
+            models.upload,
+        )
diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py
index 5d067f82..d3096869 100644
--- a/src/together/types/__init__.py
+++ b/src/together/types/__init__.py
@@ -11,17 +11,32 @@
 from .tools_param import ToolsParam as ToolsParam
 from .fine_tune_event import FineTuneEvent as FineTuneEvent
 from .rerank_response import RerankResponse as RerankResponse
+from .execute_response import ExecuteResponse as ExecuteResponse
+from .job_list_response import JobListResponse as JobListResponse
 from .tool_choice_param import ToolChoiceParam as ToolChoiceParam
 from .file_list_response import FileListResponse as FileListResponse
 from .audio_create_params import AudioCreateParams as AudioCreateParams
 from .image_create_params import ImageCreateParams as ImageCreateParams
 from .model_list_response import ModelListResponse as ModelListResponse
+from .model_upload_params import ModelUploadParams as ModelUploadParams
 from .client_rerank_params import ClientRerankParams as ClientRerankParams
+from .endpoint_list_params import EndpointListParams as EndpointListParams
 from .file_delete_response import FileDeleteResponse as FileDeleteResponse
+from .hardware_list_params import HardwareListParams as HardwareListParams
+from .job_retrieve_response import JobRetrieveResponse as JobRetrieveResponse
+from .model_upload_response import ModelUploadResponse as ModelUploadResponse
+from .endpoint_create_params import EndpointCreateParams as EndpointCreateParams
+from .endpoint_list_response import EndpointListResponse as EndpointListResponse
+from .endpoint_update_params import EndpointUpdateParams as EndpointUpdateParams
 from .file_retrieve_response import FileRetrieveResponse as FileRetrieveResponse
+from .hardware_list_response import HardwareListResponse as HardwareListResponse
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
 from .fine_tune_create_params import FineTuneCreateParams as FineTuneCreateParams
 from .fine_tune_list_response import FineTuneListResponse as FineTuneListResponse
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .endpoint_create_response import EndpointCreateResponse as EndpointCreateResponse
+from .endpoint_update_response import EndpointUpdateResponse as EndpointUpdateResponse
 from .fine_tune_download_params import FineTuneDownloadParams as FineTuneDownloadParams
+from .endpoint_retrieve_response import EndpointRetrieveResponse as EndpointRetrieveResponse
 from .fine_tune_download_response import FineTuneDownloadResponse as FineTuneDownloadResponse
+from .code_interpreter_execute_params import CodeInterpreterExecuteParams as CodeInterpreterExecuteParams
diff --git a/src/together/types/chat/chat_completion_structured_message_image_url_param.py b/src/together/types/chat/chat_completion_structured_message_image_url_param.py
index 1a1f5bfc..25d737ca 100644
--- a/src/together/types/chat/chat_completion_structured_message_image_url_param.py
+++ b/src/together/types/chat/chat_completion_structured_message_image_url_param.py
@@ -9,10 +9,10 @@
 
 class ImageURL(TypedDict, total=False):
     url: Required[str]
-    """The URL of the image as a plain string."""
+    """The URL of the image"""
 
 
 class ChatCompletionStructuredMessageImageURLParam(TypedDict, total=False):
-    image_url: Required[ImageURL]
+    image_url: ImageURL
 
-    type: Required[Literal["image_url"]]
+    type: Literal["image_url"]
diff --git a/src/together/types/chat/chat_completion_usage.py b/src/together/types/chat/chat_completion_usage.py
index 82b9d450..510233f9 100644
--- a/src/together/types/chat/chat_completion_usage.py
+++ b/src/together/types/chat/chat_completion_usage.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from ..._models import BaseModel
 
 __all__ = ["ChatCompletionUsage"]
diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py
index d6749ac7..be7d65c0 100644
--- a/src/together/types/chat/completion_create_params.py
+++ b/src/together/types/chat/completion_create_params.py
@@ -14,6 +14,8 @@
     "CompletionCreateParamsBase",
     "Message",
     "MessageContentUnionMember1",
+    "MessageContentUnionMember1Video",
+    "MessageContentUnionMember1VideoVideoURL",
     "FunctionCall",
     "FunctionCallName",
     "ResponseFormat",
@@ -157,8 +159,21 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
 
+class MessageContentUnionMember1VideoVideoURL(TypedDict, total=False):
+    url: Required[str]
+    """The URL of the video"""
+
+
+class MessageContentUnionMember1Video(TypedDict, total=False):
+    type: Required[Literal["video_url"]]
+
+    video_url: Required[MessageContentUnionMember1VideoVideoURL]
+
+
 MessageContentUnionMember1: TypeAlias = Union[
-    ChatCompletionStructuredMessageTextParam, ChatCompletionStructuredMessageImageURLParam
+    ChatCompletionStructuredMessageTextParam,
+    ChatCompletionStructuredMessageImageURLParam,
+    MessageContentUnionMember1Video,
 ]
 
 
@@ -170,7 +185,10 @@ class Message(TypedDict, total=False):
     """
 
     role: Required[Literal["system", "user", "assistant", "tool"]]
-    """The role of the messages author. Choice between: system, user, or assistant."""
+    """The role of the messages author.
+
+    Choice between: system, user, assistant, or tool.
+    """
 
 
 class FunctionCallName(TypedDict, total=False):
@@ -181,7 +199,7 @@ class FunctionCallName(TypedDict, total=False):
 
 
 class ResponseFormat(TypedDict, total=False):
-    schema: Dict[str, str]
+    schema: Dict[str, object]
     """The schema of the response format."""
 
     type: str
diff --git a/src/together/types/code_interpreter/__init__.py b/src/together/types/code_interpreter/__init__.py
new file mode 100644
index 00000000..82331854
--- /dev/null
+++ b/src/together/types/code_interpreter/__init__.py
@@ -0,0 +1,5 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .session_list_response import SessionListResponse as SessionListResponse
diff --git a/src/together/types/code_interpreter/session_list_response.py b/src/together/types/code_interpreter/session_list_response.py
new file mode 100644
index 00000000..f4379c0d
--- /dev/null
+++ b/src/together/types/code_interpreter/session_list_response.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from datetime import datetime
+
+from ..._models import BaseModel
+
+__all__ = ["SessionListResponse", "Data", "DataSession"]
+
+
+class DataSession(BaseModel):
+    id: str
+    """Session Identifier. Used to make follow-up calls."""
+
+    execute_count: int
+
+    expires_at: datetime
+
+    last_execute_at: datetime
+
+    started_at: datetime
+
+
+class Data(BaseModel):
+    sessions: List[DataSession]
+
+
+class SessionListResponse(BaseModel):
+    data: Optional[Data] = None
+
+    errors: Optional[List[Union[str, Dict[str, object]]]] = None
diff --git a/src/together/types/code_interpreter_execute_params.py b/src/together/types/code_interpreter_execute_params.py
new file mode 100644
index 00000000..91cf6c02
--- /dev/null
+++ b/src/together/types/code_interpreter_execute_params.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["CodeInterpreterExecuteParams", "File"]
+
+
+class CodeInterpreterExecuteParams(TypedDict, total=False):
+    code: Required[str]
+    """Code snippet to execute."""
+
+    language: Required[Literal["python"]]
+    """Programming language for the code to execute.
+
+    Currently only supports Python, but more will be added.
+    """
+
+    files: Iterable[File]
+    """Files to upload to the session.
+
+    If present, files will be uploaded before executing the given code.
+    """
+
+    session_id: str
+    """Identifier of the current session.
+
+    Used to make follow-up calls. Requests will return an error if the session does
+    not belong to the caller or has expired.
+    """
+
+
+class File(TypedDict, total=False):
+    content: Required[str]
+
+    encoding: Required[Literal["string", "base64"]]
+    """Encoding of the file content.
+
+    Use `string` for text files such as code, and `base64` for binary files, such as
+    images.
+    """
+
+    name: Required[str]
diff --git a/src/together/types/endpoint_create_params.py b/src/together/types/endpoint_create_params.py
new file mode 100644
index 00000000..0e41f9cd
--- /dev/null
+++ b/src/together/types/endpoint_create_params.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["EndpointCreateParams", "Autoscaling"]
+
+
+class EndpointCreateParams(TypedDict, total=False):
+    autoscaling: Required[Autoscaling]
+    """Configuration for automatic scaling of the endpoint"""
+
+    hardware: Required[str]
+    """The hardware configuration to use for this endpoint"""
+
+    model: Required[str]
+    """The model to deploy on this endpoint"""
+
+    disable_prompt_cache: bool
+    """Whether to disable the prompt cache for this endpoint"""
+
+    disable_speculative_decoding: bool
+    """Whether to disable speculative decoding for this endpoint"""
+
+    display_name: str
+    """A human-readable name for the endpoint"""
+
+    inactive_timeout: Optional[int]
+    """
+    The number of minutes of inactivity after which the endpoint will be
+    automatically stopped. Set to null, omit or set to 0 to disable automatic
+    timeout.
+    """
+
+    state: Literal["STARTED", "STOPPED"]
+    """The desired state of the endpoint"""
+
+
+class Autoscaling(TypedDict, total=False):
+    max_replicas: Required[int]
+    """The maximum number of replicas to scale up to under load"""
+
+    min_replicas: Required[int]
+    """The minimum number of replicas to maintain, even when there is no load"""
diff --git a/src/together/types/endpoint_create_response.py b/src/together/types/endpoint_create_response.py
new file mode 100644
index 00000000..2421d2f3
--- /dev/null
+++ b/src/together/types/endpoint_create_response.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EndpointCreateResponse", "Autoscaling"]
+
+
+class Autoscaling(BaseModel):
+    max_replicas: int
+    """The maximum number of replicas to scale up to under load"""
+
+    min_replicas: int
+    """The minimum number of replicas to maintain, even when there is no load"""
+
+
+class EndpointCreateResponse(BaseModel):
+    id: str
+    """Unique identifier for the endpoint"""
+
+    autoscaling: Autoscaling
+    """Configuration for automatic scaling of the endpoint"""
+
+    created_at: datetime
+    """Timestamp when the endpoint was created"""
+
+    display_name: str
+    """Human-readable name for the endpoint"""
+
+    hardware: str
+    """The hardware configuration used for this endpoint"""
+
+    model: str
+    """The model deployed on this endpoint"""
+
+    name: str
+    """System name for the endpoint"""
+
+    object: Literal["endpoint"]
+    """The type of object"""
+
+    owner: str
+    """The owner of this endpoint"""
+
+    state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
+    """Current state of the endpoint"""
+
+    type: Literal["dedicated"]
+    """The type of endpoint"""
diff --git a/src/together/types/endpoint_list_params.py b/src/together/types/endpoint_list_params.py
new file mode 100644
index 00000000..5123d49d
--- /dev/null
+++ b/src/together/types/endpoint_list_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["EndpointListParams"]
+
+
+class EndpointListParams(TypedDict, total=False):
+    type: Literal["dedicated", "serverless"]
+    """Filter endpoints by type"""
diff --git a/src/together/types/endpoint_list_response.py b/src/together/types/endpoint_list_response.py
new file mode 100644
index 00000000..009764a7
--- /dev/null
+++ b/src/together/types/endpoint_list_response.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EndpointListResponse", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """Unique identifier for the endpoint"""
+
+    created_at: datetime
+    """Timestamp when the endpoint was created"""
+
+    model: str
+    """The model deployed on this endpoint"""
+
+    name: str
+    """System name for the endpoint"""
+
+    object: Literal["endpoint"]
+    """The type of object"""
+
+    owner: str
+    """The owner of this endpoint"""
+
+    state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
+    """Current state of the endpoint"""
+
+    type: Literal["serverless", "dedicated"]
+    """The type of endpoint"""
+
+
+class EndpointListResponse(BaseModel):
+    data: List[Data]
+
+    object: Literal["list"]
diff --git a/src/together/types/endpoint_retrieve_response.py b/src/together/types/endpoint_retrieve_response.py
new file mode 100644
index 00000000..cb471adc
--- /dev/null
+++ b/src/together/types/endpoint_retrieve_response.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EndpointRetrieveResponse", "Autoscaling"]
+
+
+class Autoscaling(BaseModel):
+    max_replicas: int
+    """The maximum number of replicas to scale up to under load"""
+
+    min_replicas: int
+    """The minimum number of replicas to maintain, even when there is no load"""
+
+
+class EndpointRetrieveResponse(BaseModel):
+    id: str
+    """Unique identifier for the endpoint"""
+
+    autoscaling: Autoscaling
+    """Configuration for automatic scaling of the endpoint"""
+
+    created_at: datetime
+    """Timestamp when the endpoint was created"""
+
+    display_name: str
+    """Human-readable name for the endpoint"""
+
+    hardware: str
+    """The hardware configuration used for this endpoint"""
+
+    model: str
+    """The model deployed on this endpoint"""
+
+    name: str
+    """System name for the endpoint"""
+
+    object: Literal["endpoint"]
+    """The type of object"""
+
+    owner: str
+    """The owner of this endpoint"""
+
+    state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
+    """Current state of the endpoint"""
+
+    type: Literal["dedicated"]
+    """The type of endpoint"""
diff --git a/src/together/types/endpoint_update_params.py b/src/together/types/endpoint_update_params.py
new file mode 100644
index 00000000..85ec7527
--- /dev/null
+++ b/src/together/types/endpoint_update_params.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["EndpointUpdateParams", "Autoscaling"]
+
+
+class EndpointUpdateParams(TypedDict, total=False):
+    autoscaling: Autoscaling
+    """New autoscaling configuration for the endpoint"""
+
+    display_name: str
+    """A human-readable name for the endpoint"""
+
+    inactive_timeout: Optional[int]
+    """
+    The number of minutes of inactivity after which the endpoint will be
+    automatically stopped. Set to 0 to disable automatic timeout.
+    """
+
+    state: Literal["STARTED", "STOPPED"]
+    """The desired state of the endpoint"""
+
+
+class Autoscaling(TypedDict, total=False):
+    max_replicas: Required[int]
+    """The maximum number of replicas to scale up to under load"""
+
+    min_replicas: Required[int]
+    """The minimum number of replicas to maintain, even when there is no load"""
diff --git a/src/together/types/endpoint_update_response.py b/src/together/types/endpoint_update_response.py
new file mode 100644
index 00000000..24cb6487
--- /dev/null
+++ b/src/together/types/endpoint_update_response.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EndpointUpdateResponse", "Autoscaling"]
+
+
+class Autoscaling(BaseModel):
+    max_replicas: int
+    """The maximum number of replicas to scale up to under load"""
+
+    min_replicas: int
+    """The minimum number of replicas to maintain, even when there is no load"""
+
+
+class EndpointUpdateResponse(BaseModel):
+    id: str
+    """Unique identifier for the endpoint"""
+
+    autoscaling: Autoscaling
+    """Configuration for automatic scaling of the endpoint"""
+
+    created_at: datetime
+    """Timestamp when the endpoint was created"""
+
+    display_name: str
+    """Human-readable name for the endpoint"""
+
+    hardware: str
+    """The hardware configuration used for this endpoint"""
+
+    model: str
+    """The model deployed on this endpoint"""
+
+    name: str
+    """System name for the endpoint"""
+
+    object: Literal["endpoint"]
+    """The type of object"""
+
+    owner: str
+    """The owner of this endpoint"""
+
+    state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
+    """Current state of the endpoint"""
+
+    type: Literal["dedicated"]
+    """The type of endpoint"""
diff --git a/src/together/types/execute_response.py b/src/together/types/execute_response.py
new file mode 100644
index 00000000..9234b72c
--- /dev/null
+++ b/src/together/types/execute_response.py
@@ -0,0 +1,105 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+
+__all__ = [
+    "ExecuteResponse",
+    "SuccessfulExecution",
+    "SuccessfulExecutionData",
+    "SuccessfulExecutionDataOutput",
+    "SuccessfulExecutionDataOutputStreamOutput",
+    "SuccessfulExecutionDataOutputError",
+    "SuccessfulExecutionDataOutputDisplayorExecuteOutput",
+    "SuccessfulExecutionDataOutputDisplayorExecuteOutputData",
+    "FailedExecution",
+]
+
+
+class SuccessfulExecutionDataOutputStreamOutput(BaseModel):
+    data: str
+
+    type: Literal["stdout", "stderr"]
+
+
+class SuccessfulExecutionDataOutputError(BaseModel):
+    data: str
+
+    type: Literal["error"]
+
+
+class SuccessfulExecutionDataOutputDisplayorExecuteOutputData(BaseModel):
+    application_geo_json: Optional[Dict[str, object]] = FieldInfo(alias="application/geo+json", default=None)
+
+    application_javascript: Optional[str] = FieldInfo(alias="application/javascript", default=None)
+
+    application_json: Optional[Dict[str, object]] = FieldInfo(alias="application/json", default=None)
+
+    application_pdf: Optional[str] = FieldInfo(alias="application/pdf", default=None)
+
+    application_vnd_vega_v5_json: Optional[Dict[str, object]] = FieldInfo(
+        alias="application/vnd.vega.v5+json", default=None
+    )
+
+    application_vnd_vegalite_v4_json: Optional[Dict[str, object]] = FieldInfo(
+        alias="application/vnd.vegalite.v4+json", default=None
+    )
+
+    image_gif: Optional[str] = FieldInfo(alias="image/gif", default=None)
+
+    image_jpeg: Optional[str] = FieldInfo(alias="image/jpeg", default=None)
+
+    image_png: Optional[str] = FieldInfo(alias="image/png", default=None)
+
+    image_svg_xml: Optional[str] = FieldInfo(alias="image/svg+xml", default=None)
+
+    text_html: Optional[str] = FieldInfo(alias="text/html", default=None)
+
+    text_latex: Optional[str] = FieldInfo(alias="text/latex", default=None)
+
+    text_markdown: Optional[str] = FieldInfo(alias="text/markdown", default=None)
+
+    text_plain: Optional[str] = FieldInfo(alias="text/plain", default=None)
+
+
+class SuccessfulExecutionDataOutputDisplayorExecuteOutput(BaseModel):
+    data: SuccessfulExecutionDataOutputDisplayorExecuteOutputData
+
+    type: Literal["display_data", "execute_result"]
+
+
+SuccessfulExecutionDataOutput: TypeAlias = Annotated[
+    Union[
+        SuccessfulExecutionDataOutputStreamOutput,
+        SuccessfulExecutionDataOutputError,
+        SuccessfulExecutionDataOutputDisplayorExecuteOutput,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class SuccessfulExecutionData(BaseModel):
+    outputs: List[SuccessfulExecutionDataOutput]
+
+    session_id: str
+    """Identifier of the current session. Used to make follow-up calls."""
+
+
+class SuccessfulExecution(BaseModel):
+    data: SuccessfulExecutionData
+
+    errors: None = None
+
+
+class FailedExecution(BaseModel):
+    data: None = None
+
+    errors: List[Union[str, Dict[str, object]]]
+
+
+ExecuteResponse: TypeAlias = Union[SuccessfulExecution, FailedExecution]
diff --git a/src/together/types/fine_tune.py b/src/together/types/fine_tune.py
index ca97fe66..bf873be0 100644
--- a/src/together/types/fine_tune.py
+++ b/src/together/types/fine_tune.py
@@ -12,6 +12,11 @@
     "Event",
     "LrScheduler",
     "LrSchedulerLrSchedulerArgs",
+    "LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs",
+    "LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs",
+    "TrainingMethod",
+    "TrainingMethodTrainingMethodSft",
+    "TrainingMethodTrainingMethodDpo",
     "TrainingType",
     "TrainingTypeFullTrainingType",
     "TrainingTypeLoRaTrainingType",
@@ -74,17 +79,43 @@ class Event(BaseModel):
     level: Optional[Literal["info", "warning", "error", "legacy_info", "legacy_iwarning", "legacy_ierror"]] = None
 
 
-class LrSchedulerLrSchedulerArgs(BaseModel):
+class LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs(BaseModel):
     min_lr_ratio: Optional[float] = None
     """The ratio of the final learning rate to the peak learning rate"""
 
 
+class LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs(BaseModel):
+    min_lr_ratio: Optional[float] = None
+    """The ratio of the final learning rate to the peak learning rate"""
+
+    num_cycles: Optional[float] = None
+    """Number or fraction of cycles for the cosine learning rate scheduler"""
+
+
+LrSchedulerLrSchedulerArgs: TypeAlias = Union[
+    LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs, LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs
+]
+
+
 class LrScheduler(BaseModel):
-    lr_scheduler_type: str
+    lr_scheduler_type: Literal["linear", "cosine"]
 
     lr_scheduler_args: Optional[LrSchedulerLrSchedulerArgs] = None
 
 
+class TrainingMethodTrainingMethodSft(BaseModel):
+    method: Literal["sft"]
+
+
+class TrainingMethodTrainingMethodDpo(BaseModel):
+    method: Literal["dpo"]
+
+    dpo_beta: Optional[float] = None
+
+
+TrainingMethod: TypeAlias = Union[TrainingMethodTrainingMethodSft, TrainingMethodTrainingMethodDpo]
+
+
 class TrainingTypeFullTrainingType(BaseModel):
     type: Literal["Full"]
 
@@ -119,7 +150,7 @@ class FineTune(BaseModel):
         "completed",
     ]
 
-    batch_size: Optional[int] = None
+    batch_size: Union[int, Literal["max"], None] = None
 
     created_at: Optional[str] = None
 
@@ -129,6 +160,8 @@ class FineTune(BaseModel):
 
     events: Optional[List[Event]] = None
 
+    from_checkpoint: Optional[str] = None
+
     job_id: Optional[str] = None
 
     learning_rate: Optional[float] = None
@@ -161,6 +194,8 @@ class FineTune(BaseModel):
 
     training_file: Optional[str] = None
 
+    training_method: Optional[TrainingMethod] = None
+
     training_type: Optional[TrainingType] = None
 
     trainingfile_numlines: Optional[int] = None
diff --git a/src/together/types/fine_tune_create_params.py b/src/together/types/fine_tune_create_params.py
index 1ace4f8d..3291e5a5 100644
--- a/src/together/types/fine_tune_create_params.py
+++ b/src/together/types/fine_tune_create_params.py
@@ -9,6 +9,11 @@
     "FineTuneCreateParams",
     "LrScheduler",
     "LrSchedulerLrSchedulerArgs",
+    "LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs",
+    "LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs",
+    "TrainingMethod",
+    "TrainingMethodTrainingMethodSft",
+    "TrainingMethodTrainingMethodDpo",
     "TrainingType",
     "TrainingTypeFullTrainingType",
     "TrainingTypeLoRaTrainingType",
@@ -22,10 +27,19 @@ class FineTuneCreateParams(TypedDict, total=False):
     training_file: Required[str]
     """File-ID of a training file uploaded to the Together API"""
 
-    batch_size: int
+    batch_size: Union[int, Literal["max"]]
     """
     Number of training examples processed together (larger batches use more memory
-    but may train faster)
+    but may train faster). Defaults to "max". We use training optimizations like
+    packing, so the effective batch size may be different than the value you set.
+    """
+
+    from_checkpoint: str
+    """The checkpoint identifier to continue training from a previous fine-tuning job.
+
+    Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or
+    `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the
+    final checkpoint will be used.
     """
 
     learning_rate: float
@@ -35,6 +49,10 @@ class FineTuneCreateParams(TypedDict, total=False):
     """
 
     lr_scheduler: LrScheduler
+    """The learning rate scheduler to use.
+
+    It specifies how the learning rate is adjusted during training.
+    """
 
     max_grad_norm: float
     """Max gradient norm to be used for gradient clipping. Set to 0 to disable."""
@@ -60,6 +78,12 @@ class FineTuneCreateParams(TypedDict, total=False):
     instruction data.
     """
 
+    training_method: TrainingMethod
+    """The training method to use.
+
+    'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
+    """
+
     training_type: TrainingType
 
     validation_file: str
@@ -87,20 +111,46 @@ class FineTuneCreateParams(TypedDict, total=False):
     """
 
     weight_decay: float
-    """Weight decay"""
+    """Weight decay. Regularization parameter for the optimizer."""
+
+
+class LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs(TypedDict, total=False):
+    min_lr_ratio: float
+    """The ratio of the final learning rate to the peak learning rate"""
 
 
-class LrSchedulerLrSchedulerArgs(TypedDict, total=False):
+class LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs(TypedDict, total=False):
     min_lr_ratio: float
     """The ratio of the final learning rate to the peak learning rate"""
 
+    num_cycles: float
+    """Number or fraction of cycles for the cosine learning rate scheduler"""
+
+
+LrSchedulerLrSchedulerArgs: TypeAlias = Union[
+    LrSchedulerLrSchedulerArgsLinearLrSchedulerArgs, LrSchedulerLrSchedulerArgsCosineLrSchedulerArgs
+]
+
 
 class LrScheduler(TypedDict, total=False):
-    lr_scheduler_type: Required[str]
+    lr_scheduler_type: Required[Literal["linear", "cosine"]]
 
     lr_scheduler_args: LrSchedulerLrSchedulerArgs
 
 
+class TrainingMethodTrainingMethodSft(TypedDict, total=False):
+    method: Required[Literal["sft"]]
+
+
+class TrainingMethodTrainingMethodDpo(TypedDict, total=False):
+    method: Required[Literal["dpo"]]
+
+    dpo_beta: float
+
+
+TrainingMethod: TypeAlias = Union[TrainingMethodTrainingMethodSft, TrainingMethodTrainingMethodDpo]
+
+
 class TrainingTypeFullTrainingType(TypedDict, total=False):
     type: Required[Literal["Full"]]
 
diff --git a/src/together/types/fine_tune_download_response.py b/src/together/types/fine_tune_download_response.py
index 055c235e..a5f5953b 100644
--- a/src/together/types/fine_tune_download_response.py
+++ b/src/together/types/fine_tune_download_response.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
 from typing import Optional
+from typing_extensions import Literal
 
 from .._models import BaseModel
 
@@ -15,6 +15,6 @@ class FineTuneDownloadResponse(BaseModel):
 
     filename: Optional[str] = None
 
-    object: Optional[builtins.object] = None
+    object: Optional[Literal["local"]] = None
 
     size: Optional[int] = None
diff --git a/src/together/types/hardware_list_params.py b/src/together/types/hardware_list_params.py
new file mode 100644
index 00000000..6506655e
--- /dev/null
+++ b/src/together/types/hardware_list_params.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["HardwareListParams"]
+
+
+class HardwareListParams(TypedDict, total=False):
+    model: str
+    """Filter hardware configurations by model compatibility.
+
+    When provided, the response includes availability status for each compatible
+    configuration.
+    """
diff --git a/src/together/types/hardware_list_response.py b/src/together/types/hardware_list_response.py
new file mode 100644
index 00000000..43481726
--- /dev/null
+++ b/src/together/types/hardware_list_response.py
@@ -0,0 +1,58 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["HardwareListResponse", "Data", "DataPricing", "DataSpecs", "DataAvailability"]
+
+
+class DataPricing(BaseModel):
+    cents_per_minute: float
+    """Cost per minute of endpoint uptime in cents"""
+
+
+class DataSpecs(BaseModel):
+    gpu_count: int
+    """Number of GPUs in this configuration"""
+
+    gpu_link: str
+    """The GPU interconnect technology"""
+
+    gpu_memory: float
+    """Amount of GPU memory in GB"""
+
+    gpu_type: str
+    """The type/model of GPU"""
+
+
+class DataAvailability(BaseModel):
+    status: Literal["available", "unavailable", "insufficient"]
+    """The availability status of the hardware configuration"""
+
+
+class Data(BaseModel):
+    id: str
+    """Unique identifier for the hardware configuration"""
+
+    object: Literal["hardware"]
+
+    pricing: DataPricing
+    """Pricing details for using an endpoint"""
+
+    specs: DataSpecs
+    """Detailed specifications of a hardware configuration"""
+
+    updated_at: datetime
+    """Timestamp of when the hardware status was last updated"""
+
+    availability: Optional[DataAvailability] = None
+    """Indicates the current availability status of a hardware configuration"""
+
+
+class HardwareListResponse(BaseModel):
+    data: List[Data]
+
+    object: Literal["list"]
diff --git a/src/together/types/job_list_response.py b/src/together/types/job_list_response.py
new file mode 100644
index 00000000..11281d23
--- /dev/null
+++ b/src/together/types/job_list_response.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["JobListResponse", "Data", "DataArgs", "DataStatusUpdate"]
+
+
+class DataArgs(BaseModel):
+    description: Optional[str] = None
+
+    api_model_name: Optional[str] = FieldInfo(alias="modelName", default=None)
+
+    api_model_source: Optional[str] = FieldInfo(alias="modelSource", default=None)
+
+
+class DataStatusUpdate(BaseModel):
+    message: str
+
+    status: str
+
+    timestamp: datetime
+
+
+class Data(BaseModel):
+    args: DataArgs
+
+    created_at: datetime
+
+    job_id: str
+
+    status: Literal["Queued", "Running", "Complete", "Failed"]
+
+    status_updates: List[DataStatusUpdate]
+
+    type: str
+
+    updated_at: datetime
+
+
+class JobListResponse(BaseModel):
+    data: List[Data]
diff --git a/src/together/types/job_retrieve_response.py b/src/together/types/job_retrieve_response.py
new file mode 100644
index 00000000..ded83144
--- /dev/null
+++ b/src/together/types/job_retrieve_response.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["JobRetrieveResponse", "Args", "StatusUpdate"]
+
+
+class Args(BaseModel):
+    description: Optional[str] = None
+
+    api_model_name: Optional[str] = FieldInfo(alias="modelName", default=None)
+
+    api_model_source: Optional[str] = FieldInfo(alias="modelSource", default=None)
+
+
+class StatusUpdate(BaseModel):
+    message: str
+
+    status: str
+
+    timestamp: datetime
+
+
+class JobRetrieveResponse(BaseModel):
+    args: Args
+
+    created_at: datetime
+
+    job_id: str
+
+    status: Literal["Queued", "Running", "Complete", "Failed"]
+
+    status_updates: List[StatusUpdate]
+
+    type: str
+
+    updated_at: datetime
diff --git a/src/together/types/model_upload_params.py b/src/together/types/model_upload_params.py
new file mode 100644
index 00000000..9a159405
--- /dev/null
+++ b/src/together/types/model_upload_params.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ModelUploadParams"]
+
+
+class ModelUploadParams(TypedDict, total=False):
+    model_name: Required[str]
+    """The name to give to your uploaded model"""
+
+    model_source: Required[str]
+    """The source location of the model (Hugging Face repo or S3 path)"""
+
+    base_model: str
+    """
+    The base model to use for an adapter if setting it to run against a serverless
+    pool. Only used for model_type `adapter`.
+    """
+
+    description: str
+    """A description of your model"""
+
+    hf_token: str
+    """Hugging Face token (if uploading from Hugging Face)"""
+
+    lora_model: str
+    """
+    The lora pool to use for an adapter if setting it to run against, say, a
+    dedicated pool. Only used for model_type `adapter`.
+    """
+
+    model_type: Literal["model", "adapter"]
+    """Whether the model is a full model or an adapter"""
diff --git a/src/together/types/model_upload_response.py b/src/together/types/model_upload_response.py
new file mode 100644
index 00000000..9b8d9237
--- /dev/null
+++ b/src/together/types/model_upload_response.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["ModelUploadResponse", "Data"]
+
+
+class Data(BaseModel):
+    job_id: str
+
+    api_model_id: str = FieldInfo(alias="model_id")
+
+    api_model_name: str = FieldInfo(alias="model_name")
+
+    api_model_source: str = FieldInfo(alias="model_source")
+
+
+class ModelUploadResponse(BaseModel):
+    data: Data
+
+    message: str
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 88553cec..207023a6 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -26,7 +26,7 @@ def test_method_create_overload_1(self, client: Together) -> None:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -39,7 +39,7 @@ def test_method_create_with_all_params_overload_1(self, client: Together) -> Non
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             context_length_exceeded_behavior="truncate",
             echo=True,
             frequency_penalty=0,
@@ -55,7 +55,7 @@ def test_method_create_with_all_params_overload_1(self, client: Together) -> Non
             presence_penalty=0,
             repetition_penalty=0,
             response_format={
-                "schema": {"foo": "string"},
+                "schema": {"foo": "bar"},
                 "type": "json",
             },
             safety_model="safety_model_name",
@@ -88,7 +88,7 @@ def test_raw_response_create_overload_1(self, client: Together) -> None:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
         )
 
         assert response.is_closed is True
@@ -105,7 +105,7 @@ def test_streaming_response_create_overload_1(self, client: Together) -> None:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -124,7 +124,7 @@ def test_method_create_overload_2(self, client: Together) -> None:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             stream=True,
         )
         completion_stream.response.close()
@@ -138,7 +138,7 @@ def test_method_create_with_all_params_overload_2(self, client: Together) -> Non
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             stream=True,
             context_length_exceeded_behavior="truncate",
             echo=True,
@@ -155,7 +155,7 @@ def test_method_create_with_all_params_overload_2(self, client: Together) -> Non
             presence_penalty=0,
             repetition_penalty=0,
             response_format={
-                "schema": {"foo": "string"},
+                "schema": {"foo": "bar"},
                 "type": "json",
             },
             safety_model="safety_model_name",
@@ -187,7 +187,7 @@ def test_raw_response_create_overload_2(self, client: Together) -> None:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             stream=True,
         )
 
@@ -204,7 +204,7 @@ def test_streaming_response_create_overload_2(self, client: Together) -> None:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -228,7 +228,7 @@ async def test_method_create_overload_1(self, async_client: AsyncTogether) -> No
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -241,7 +241,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             context_length_exceeded_behavior="truncate",
             echo=True,
             frequency_penalty=0,
@@ -257,7 +257,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             presence_penalty=0,
             repetition_penalty=0,
             response_format={
-                "schema": {"foo": "string"},
+                "schema": {"foo": "bar"},
                 "type": "json",
             },
             safety_model="safety_model_name",
@@ -290,7 +290,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncTogether)
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
         )
 
         assert response.is_closed is True
@@ -307,7 +307,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncTog
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -326,7 +326,7 @@ async def test_method_create_overload_2(self, async_client: AsyncTogether) -> No
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             stream=True,
         )
         await completion_stream.response.aclose()
@@ -340,7 +340,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             stream=True,
             context_length_exceeded_behavior="truncate",
             echo=True,
@@ -357,7 +357,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             presence_penalty=0,
             repetition_penalty=0,
             response_format={
-                "schema": {"foo": "string"},
+                "schema": {"foo": "bar"},
                 "type": "json",
             },
             safety_model="safety_model_name",
@@ -389,7 +389,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncTogether)
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             stream=True,
         )
 
@@ -406,7 +406,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncTog
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             stream=True,
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/code_interpreter/__init__.py b/tests/api_resources/code_interpreter/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/code_interpreter/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/code_interpreter/test_sessions.py b/tests/api_resources/code_interpreter/test_sessions.py
new file mode 100644
index 00000000..e53d7a4a
--- /dev/null
+++ b/tests/api_resources/code_interpreter/test_sessions.py
@@ -0,0 +1,90 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from together import Together, AsyncTogether
+from tests.utils import assert_matches_type
+from together.types.code_interpreter import SessionListResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestSessions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    def test_method_list(self, client: Together) -> None:
+        session = client.code_interpreter.sessions.list()
+        assert_matches_type(SessionListResponse, session, path=["response"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    def test_raw_response_list(self, client: Together) -> None:
+        response = client.code_interpreter.sessions.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(SessionListResponse, session, path=["response"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    def test_streaming_response_list(self, client: Together) -> None:
+        with client.code_interpreter.sessions.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = response.parse()
+            assert_matches_type(SessionListResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncSessions:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    async def test_method_list(self, async_client: AsyncTogether) -> None:
+        session = await async_client.code_interpreter.sessions.list()
+        assert_matches_type(SessionListResponse, session, path=["response"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncTogether) -> None:
+        response = await async_client.code_interpreter.sessions.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = await response.parse()
+        assert_matches_type(SessionListResponse, session, path=["response"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncTogether) -> None:
+        async with async_client.code_interpreter.sessions.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = await response.parse()
+            assert_matches_type(SessionListResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_code_interpreter.py b/tests/api_resources/test_code_interpreter.py
new file mode 100644
index 00000000..17c1928c
--- /dev/null
+++ b/tests/api_resources/test_code_interpreter.py
@@ -0,0 +1,146 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from together import Together, AsyncTogether
+from tests.utils import assert_matches_type
+from together.types import ExecuteResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestCodeInterpreter:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    def test_method_execute(self, client: Together) -> None:
+        code_interpreter = client.code_interpreter.execute(
+            code="print('Hello, world!')",
+            language="python",
+        )
+        assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    def test_method_execute_with_all_params(self, client: Together) -> None:
+        code_interpreter = client.code_interpreter.execute(
+            code="print('Hello, world!')",
+            language="python",
+            files=[
+                {
+                    "content": "content",
+                    "encoding": "string",
+                    "name": "name",
+                }
+            ],
+            session_id="ses_abcDEF123",
+        )
+        assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    def test_raw_response_execute(self, client: Together) -> None:
+        response = client.code_interpreter.with_raw_response.execute(
+            code="print('Hello, world!')",
+            language="python",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        code_interpreter = response.parse()
+        assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    def test_streaming_response_execute(self, client: Together) -> None:
+        with client.code_interpreter.with_streaming_response.execute(
+            code="print('Hello, world!')",
+            language="python",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            code_interpreter = response.parse()
+            assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncCodeInterpreter:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    async def test_method_execute(self, async_client: AsyncTogether) -> None:
+        code_interpreter = await async_client.code_interpreter.execute(
+            code="print('Hello, world!')",
+            language="python",
+        )
+        assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    async def test_method_execute_with_all_params(self, async_client: AsyncTogether) -> None:
+        code_interpreter = await async_client.code_interpreter.execute(
+            code="print('Hello, world!')",
+            language="python",
+            files=[
+                {
+                    "content": "content",
+                    "encoding": "string",
+                    "name": "name",
+                }
+            ],
+            session_id="ses_abcDEF123",
+        )
+        assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    async def test_raw_response_execute(self, async_client: AsyncTogether) -> None:
+        response = await async_client.code_interpreter.with_raw_response.execute(
+            code="print('Hello, world!')",
+            language="python",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        code_interpreter = await response.parse()
+        assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+    @pytest.mark.skip(
+        reason="currently no good way to test endpoints defining callbacks, Prism mock server will fail trying to reach the provided callback url"
+    )
+    @parametrize
+    async def test_streaming_response_execute(self, async_client: AsyncTogether) -> None:
+        async with async_client.code_interpreter.with_streaming_response.execute(
+            code="print('Hello, world!')",
+            language="python",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            code_interpreter = await response.parse()
+            assert_matches_type(ExecuteResponse, code_interpreter, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py
index a81e24c5..084ad480 100644
--- a/tests/api_resources/test_embeddings.py
+++ b/tests/api_resources/test_embeddings.py
@@ -21,7 +21,7 @@ class TestEmbeddings:
     def test_method_create(self, client: Together) -> None:
         embedding = client.embeddings.create(
             input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
-            model="WhereIsAI/UAE-Large-V1",
+            model="togethercomputer/m2-bert-80M-8k-retrieval",
         )
         assert_matches_type(Embedding, embedding, path=["response"])
 
@@ -29,7 +29,7 @@ def test_method_create(self, client: Together) -> None:
     def test_raw_response_create(self, client: Together) -> None:
         response = client.embeddings.with_raw_response.create(
             input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
-            model="WhereIsAI/UAE-Large-V1",
+            model="togethercomputer/m2-bert-80M-8k-retrieval",
         )
 
         assert response.is_closed is True
@@ -41,7 +41,7 @@ def test_raw_response_create(self, client: Together) -> None:
     def test_streaming_response_create(self, client: Together) -> None:
         with client.embeddings.with_streaming_response.create(
             input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
-            model="WhereIsAI/UAE-Large-V1",
+            model="togethercomputer/m2-bert-80M-8k-retrieval",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -59,7 +59,7 @@ class TestAsyncEmbeddings:
     async def test_method_create(self, async_client: AsyncTogether) -> None:
         embedding = await async_client.embeddings.create(
             input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
-            model="WhereIsAI/UAE-Large-V1",
+            model="togethercomputer/m2-bert-80M-8k-retrieval",
         )
         assert_matches_type(Embedding, embedding, path=["response"])
 
@@ -67,7 +67,7 @@ async def test_method_create(self, async_client: AsyncTogether) -> None:
     async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
         response = await async_client.embeddings.with_raw_response.create(
             input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
-            model="WhereIsAI/UAE-Large-V1",
+            model="togethercomputer/m2-bert-80M-8k-retrieval",
         )
 
         assert response.is_closed is True
@@ -79,7 +79,7 @@ async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
     async def test_streaming_response_create(self, async_client: AsyncTogether) -> None:
         async with async_client.embeddings.with_streaming_response.create(
             input="Our solar system orbits the Milky Way galaxy at about 515,000 mph",
-            model="WhereIsAI/UAE-Large-V1",
+            model="togethercomputer/m2-bert-80M-8k-retrieval",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_endpoints.py b/tests/api_resources/test_endpoints.py
new file mode 100644
index 00000000..59cbc6ab
--- /dev/null
+++ b/tests/api_resources/test_endpoints.py
@@ -0,0 +1,473 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from together import Together, AsyncTogether
+from tests.utils import assert_matches_type
+from together.types import (
+    EndpointListResponse,
+    EndpointCreateResponse,
+    EndpointUpdateResponse,
+    EndpointRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestEndpoints:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: Together) -> None:
+        endpoint = client.endpoints.create(
+            autoscaling={
+                "max_replicas": 5,
+                "min_replicas": 2,
+            },
+            hardware="1x_nvidia_a100_80gb_sxm",
+            model="meta-llama/Llama-3-8b-chat-hf",
+        )
+        assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: Together) -> None:
+        endpoint = client.endpoints.create(
+            autoscaling={
+                "max_replicas": 5,
+                "min_replicas": 2,
+            },
+            hardware="1x_nvidia_a100_80gb_sxm",
+            model="meta-llama/Llama-3-8b-chat-hf",
+            disable_prompt_cache=True,
+            disable_speculative_decoding=True,
+            display_name="My Llama3 70b endpoint",
+            inactive_timeout=60,
+            state="STARTED",
+        )
+        assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: Together) -> None:
+        response = client.endpoints.with_raw_response.create(
+            autoscaling={
+                "max_replicas": 5,
+                "min_replicas": 2,
+            },
+            hardware="1x_nvidia_a100_80gb_sxm",
+            model="meta-llama/Llama-3-8b-chat-hf",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        endpoint = response.parse()
+        assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: Together) -> None:
+        with client.endpoints.with_streaming_response.create(
+            autoscaling={
+                "max_replicas": 5,
+                "min_replicas": 2,
+            },
+            hardware="1x_nvidia_a100_80gb_sxm",
+            model="meta-llama/Llama-3-8b-chat-hf",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            endpoint = response.parse()
+            assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: Together) -> None:
+        endpoint = client.endpoints.retrieve(
+            "endpointId",
+        )
+        assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: Together) -> None:
+        response = client.endpoints.with_raw_response.retrieve(
+            "endpointId",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        endpoint = response.parse()
+        assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: Together) -> None:
+        with client.endpoints.with_streaming_response.retrieve(
+            "endpointId",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            endpoint = response.parse()
+            assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: Together) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+            client.endpoints.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_update(self, client: Together) -> None:
+        endpoint = client.endpoints.update(
+            endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+        )
+        assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+    @parametrize
+    def test_method_update_with_all_params(self, client: Together) -> None:
+        endpoint = client.endpoints.update(
+            endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+            autoscaling={
+                "max_replicas": 5,
+                "min_replicas": 2,
+            },
+            display_name="My Llama3 70b endpoint",
+            inactive_timeout=60,
+            state="STARTED",
+        )
+        assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: Together) -> None:
+        response = client.endpoints.with_raw_response.update(
+            endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        endpoint = response.parse()
+        assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: Together) -> None:
+        with client.endpoints.with_streaming_response.update(
+            endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            endpoint = response.parse()
+            assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: Together) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+            client.endpoints.with_raw_response.update(
+                endpoint_id="",
+            )
+
+    @parametrize
+    def test_method_list(self, client: Together) -> None:
+        endpoint = client.endpoints.list()
+        assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: Together) -> None:
+        endpoint = client.endpoints.list(
+            type="dedicated",
+        )
+        assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: Together) -> None:
+        response = client.endpoints.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        endpoint = response.parse()
+        assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: Together) -> None:
+        with client.endpoints.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            endpoint = response.parse()
+            assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: Together) -> None:
+        endpoint = client.endpoints.delete(
+            "endpointId",
+        )
+        assert endpoint is None
+
+    @parametrize
+    def test_raw_response_delete(self, client: Together) -> None:
+        response = client.endpoints.with_raw_response.delete(
+            "endpointId",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        endpoint = response.parse()
+        assert endpoint is None
+
+    @parametrize
+    def test_streaming_response_delete(self, client: Together) -> None:
+        with client.endpoints.with_streaming_response.delete(
+            "endpointId",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            endpoint = response.parse()
+            assert endpoint is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: Together) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+            client.endpoints.with_raw_response.delete(
+                "",
+            )
+
+
+class TestAsyncEndpoints:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncTogether) -> None:
+        endpoint = await async_client.endpoints.create(
+            autoscaling={
+                "max_replicas": 5,
+                "min_replicas": 2,
+            },
+            hardware="1x_nvidia_a100_80gb_sxm",
+            model="meta-llama/Llama-3-8b-chat-hf",
+        )
+        assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncTogether) -> None:
+        endpoint = await async_client.endpoints.create(
+            autoscaling={
+                "max_replicas": 5,
+                "min_replicas": 2,
+            },
+            hardware="1x_nvidia_a100_80gb_sxm",
+            model="meta-llama/Llama-3-8b-chat-hf",
+            disable_prompt_cache=True,
+            disable_speculative_decoding=True,
+            display_name="My Llama3 70b endpoint",
+            inactive_timeout=60,
+            state="STARTED",
+        )
+        assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
+        response = await async_client.endpoints.with_raw_response.create(
+            autoscaling={
+                "max_replicas": 5,
+                "min_replicas": 2,
+            },
+            hardware="1x_nvidia_a100_80gb_sxm",
+            model="meta-llama/Llama-3-8b-chat-hf",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        endpoint = await response.parse()
+        assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncTogether) -> None:
+        async with async_client.endpoints.with_streaming_response.create(
+            autoscaling={
+                "max_replicas": 5,
+                "min_replicas": 2,
+            },
+            hardware="1x_nvidia_a100_80gb_sxm",
+            model="meta-llama/Llama-3-8b-chat-hf",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            endpoint = await response.parse()
+            assert_matches_type(EndpointCreateResponse, endpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncTogether) -> None:
+        endpoint = await async_client.endpoints.retrieve(
+            "endpointId",
+        )
+        assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncTogether) -> None:
+        response = await async_client.endpoints.with_raw_response.retrieve(
+            "endpointId",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        endpoint = await response.parse()
+        assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncTogether) -> None:
+        async with async_client.endpoints.with_streaming_response.retrieve(
+            "endpointId",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            endpoint = await response.parse()
+            assert_matches_type(EndpointRetrieveResponse, endpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncTogether) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+            await async_client.endpoints.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncTogether) -> None:
+        endpoint = await async_client.endpoints.update(
+            endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+        )
+        assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+    @parametrize
+    async def test_method_update_with_all_params(self, async_client: AsyncTogether) -> None:
+        endpoint = await async_client.endpoints.update(
+            endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+            autoscaling={
+                "max_replicas": 5,
+                "min_replicas": 2,
+            },
+            display_name="My Llama3 70b endpoint",
+            inactive_timeout=60,
+            state="STARTED",
+        )
+        assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncTogether) -> None:
+        response = await async_client.endpoints.with_raw_response.update(
+            endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        endpoint = await response.parse()
+        assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncTogether) -> None:
+        async with async_client.endpoints.with_streaming_response.update(
+            endpoint_id="endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            endpoint = await response.parse()
+            assert_matches_type(EndpointUpdateResponse, endpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncTogether) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+            await async_client.endpoints.with_raw_response.update(
+                endpoint_id="",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncTogether) -> None:
+        endpoint = await async_client.endpoints.list()
+        assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncTogether) -> None:
+        endpoint = await async_client.endpoints.list(
+            type="dedicated",
+        )
+        assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncTogether) -> None:
+        response = await async_client.endpoints.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        endpoint = await response.parse()
+        assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncTogether) -> None:
+        async with async_client.endpoints.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            endpoint = await response.parse()
+            assert_matches_type(EndpointListResponse, endpoint, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncTogether) -> None:
+        endpoint = await async_client.endpoints.delete(
+            "endpointId",
+        )
+        assert endpoint is None
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncTogether) -> None:
+        response = await async_client.endpoints.with_raw_response.delete(
+            "endpointId",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        endpoint = await response.parse()
+        assert endpoint is None
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncTogether) -> None:
+        async with async_client.endpoints.with_streaming_response.delete(
+            "endpointId",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            endpoint = await response.parse()
+            assert endpoint is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncTogether) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `endpoint_id` but received ''"):
+            await async_client.endpoints.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/test_fine_tune.py b/tests/api_resources/test_fine_tune.py
index 8a4eda52..4545089a 100644
--- a/tests/api_resources/test_fine_tune.py
+++ b/tests/api_resources/test_fine_tune.py
@@ -22,6 +22,7 @@
 class TestFineTune:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
+    @pytest.mark.skip(reason="invalid oneOf in required props")
     @parametrize
     def test_method_create(self, client: Together) -> None:
         fine_tune = client.fine_tune.create(
@@ -30,15 +31,17 @@ def test_method_create(self, client: Together) -> None:
         )
         assert_matches_type(FineTune, fine_tune, path=["response"])
 
+    @pytest.mark.skip(reason="invalid oneOf in required props")
     @parametrize
     def test_method_create_with_all_params(self, client: Together) -> None:
         fine_tune = client.fine_tune.create(
             model="model",
             training_file="training_file",
             batch_size=0,
+            from_checkpoint="from_checkpoint",
             learning_rate=0,
             lr_scheduler={
-                "lr_scheduler_type": "lr_scheduler_type",
+                "lr_scheduler_type": "linear",
                 "lr_scheduler_args": {"min_lr_ratio": 0},
             },
             max_grad_norm=0,
@@ -47,6 +50,7 @@ def test_method_create_with_all_params(self, client: Together) -> None:
             n_evals=0,
             suffix="suffix",
             train_on_inputs=True,
+            training_method={"method": "sft"},
             training_type={"type": "Full"},
             validation_file="validation_file",
             wandb_api_key="wandb_api_key",
@@ -58,6 +62,7 @@ def test_method_create_with_all_params(self, client: Together) -> None:
         )
         assert_matches_type(FineTune, fine_tune, path=["response"])
 
+    @pytest.mark.skip(reason="invalid oneOf in required props")
     @parametrize
     def test_raw_response_create(self, client: Together) -> None:
         response = client.fine_tune.with_raw_response.create(
@@ -70,6 +75,7 @@ def test_raw_response_create(self, client: Together) -> None:
         fine_tune = response.parse()
         assert_matches_type(FineTune, fine_tune, path=["response"])
 
+    @pytest.mark.skip(reason="invalid oneOf in required props")
     @parametrize
     def test_streaming_response_create(self, client: Together) -> None:
         with client.fine_tune.with_streaming_response.create(
@@ -268,6 +274,7 @@ def test_path_params_list_events(self, client: Together) -> None:
 class TestAsyncFineTune:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
 
+    @pytest.mark.skip(reason="invalid oneOf in required props")
     @parametrize
     async def test_method_create(self, async_client: AsyncTogether) -> None:
         fine_tune = await async_client.fine_tune.create(
@@ -276,15 +283,17 @@ async def test_method_create(self, async_client: AsyncTogether) -> None:
         )
         assert_matches_type(FineTune, fine_tune, path=["response"])
 
+    @pytest.mark.skip(reason="invalid oneOf in required props")
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncTogether) -> None:
         fine_tune = await async_client.fine_tune.create(
             model="model",
             training_file="training_file",
             batch_size=0,
+            from_checkpoint="from_checkpoint",
             learning_rate=0,
             lr_scheduler={
-                "lr_scheduler_type": "lr_scheduler_type",
+                "lr_scheduler_type": "linear",
                 "lr_scheduler_args": {"min_lr_ratio": 0},
             },
             max_grad_norm=0,
@@ -293,6 +302,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether)
             n_evals=0,
             suffix="suffix",
             train_on_inputs=True,
+            training_method={"method": "sft"},
             training_type={"type": "Full"},
             validation_file="validation_file",
             wandb_api_key="wandb_api_key",
@@ -304,6 +314,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether)
         )
         assert_matches_type(FineTune, fine_tune, path=["response"])
 
+    @pytest.mark.skip(reason="invalid oneOf in required props")
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
         response = await async_client.fine_tune.with_raw_response.create(
@@ -316,6 +327,7 @@ async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
         fine_tune = await response.parse()
         assert_matches_type(FineTune, fine_tune, path=["response"])
 
+    @pytest.mark.skip(reason="invalid oneOf in required props")
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncTogether) -> None:
         async with async_client.fine_tune.with_streaming_response.create(
diff --git a/tests/api_resources/test_hardware.py b/tests/api_resources/test_hardware.py
new file mode 100644
index 00000000..aafe18f0
--- /dev/null
+++ b/tests/api_resources/test_hardware.py
@@ -0,0 +1,86 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from together import Together, AsyncTogether
+from tests.utils import assert_matches_type
+from together.types import HardwareListResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestHardware:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: Together) -> None:
+        hardware = client.hardware.list()
+        assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: Together) -> None:
+        hardware = client.hardware.list(
+            model="model",
+        )
+        assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: Together) -> None:
+        response = client.hardware.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        hardware = response.parse()
+        assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: Together) -> None:
+        with client.hardware.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            hardware = response.parse()
+            assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncHardware:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncTogether) -> None:
+        hardware = await async_client.hardware.list()
+        assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncTogether) -> None:
+        hardware = await async_client.hardware.list(
+            model="model",
+        )
+        assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncTogether) -> None:
+        response = await async_client.hardware.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        hardware = await response.parse()
+        assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncTogether) -> None:
+        async with async_client.hardware.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            hardware = await response.parse()
+            assert_matches_type(HardwareListResponse, hardware, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_images.py b/tests/api_resources/test_images.py
index 2d1dda01..0e84fb5f 100644
--- a/tests/api_resources/test_images.py
+++ b/tests/api_resources/test_images.py
@@ -21,7 +21,7 @@ class TestImages:
     @parametrize
     def test_method_create(self, client: Together) -> None:
         image = client.images.create(
-            model="black-forest-labs/FLUX.1-schnell-Free",
+            model="black-forest-labs/FLUX.1-schnell",
             prompt="cat floating in space, cinematic",
         )
         assert_matches_type(ImageFile, image, path=["response"])
@@ -30,7 +30,7 @@ def test_method_create(self, client: Together) -> None:
     @parametrize
     def test_method_create_with_all_params(self, client: Together) -> None:
         image = client.images.create(
-            model="black-forest-labs/FLUX.1-schnell-Free",
+            model="black-forest-labs/FLUX.1-schnell",
             prompt="cat floating in space, cinematic",
             guidance=0,
             height=0,
@@ -55,7 +55,7 @@ def test_method_create_with_all_params(self, client: Together) -> None:
     @parametrize
     def test_raw_response_create(self, client: Together) -> None:
         response = client.images.with_raw_response.create(
-            model="black-forest-labs/FLUX.1-schnell-Free",
+            model="black-forest-labs/FLUX.1-schnell",
             prompt="cat floating in space, cinematic",
         )
 
@@ -68,7 +68,7 @@ def test_raw_response_create(self, client: Together) -> None:
     @parametrize
     def test_streaming_response_create(self, client: Together) -> None:
         with client.images.with_streaming_response.create(
-            model="black-forest-labs/FLUX.1-schnell-Free",
+            model="black-forest-labs/FLUX.1-schnell",
             prompt="cat floating in space, cinematic",
         ) as response:
             assert not response.is_closed
@@ -87,7 +87,7 @@ class TestAsyncImages:
     @parametrize
     async def test_method_create(self, async_client: AsyncTogether) -> None:
         image = await async_client.images.create(
-            model="black-forest-labs/FLUX.1-schnell-Free",
+            model="black-forest-labs/FLUX.1-schnell",
             prompt="cat floating in space, cinematic",
         )
         assert_matches_type(ImageFile, image, path=["response"])
@@ -96,7 +96,7 @@ async def test_method_create(self, async_client: AsyncTogether) -> None:
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncTogether) -> None:
         image = await async_client.images.create(
-            model="black-forest-labs/FLUX.1-schnell-Free",
+            model="black-forest-labs/FLUX.1-schnell",
             prompt="cat floating in space, cinematic",
             guidance=0,
             height=0,
@@ -121,7 +121,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether)
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
         response = await async_client.images.with_raw_response.create(
-            model="black-forest-labs/FLUX.1-schnell-Free",
+            model="black-forest-labs/FLUX.1-schnell",
             prompt="cat floating in space, cinematic",
         )
 
@@ -134,7 +134,7 @@ async def test_raw_response_create(self, async_client: AsyncTogether) -> None:
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncTogether) -> None:
         async with async_client.images.with_streaming_response.create(
-            model="black-forest-labs/FLUX.1-schnell-Free",
+            model="black-forest-labs/FLUX.1-schnell",
             prompt="cat floating in space, cinematic",
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/test_jobs.py b/tests/api_resources/test_jobs.py
new file mode 100644
index 00000000..110600d7
--- /dev/null
+++ b/tests/api_resources/test_jobs.py
@@ -0,0 +1,148 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from together import Together, AsyncTogether
+from tests.utils import assert_matches_type
+from together.types import JobListResponse, JobRetrieveResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestJobs:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: Together) -> None:
+        job = client.jobs.retrieve(
+            "jobId",
+        )
+        assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: Together) -> None:
+        response = client.jobs.with_raw_response.retrieve(
+            "jobId",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: Together) -> None:
+        with client.jobs.with_streaming_response.retrieve(
+            "jobId",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: Together) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+            client.jobs.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: Together) -> None:
+        job = client.jobs.list()
+        assert_matches_type(JobListResponse, job, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: Together) -> None:
+        response = client.jobs.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(JobListResponse, job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: Together) -> None:
+        with client.jobs.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(JobListResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncJobs:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncTogether) -> None:
+        job = await async_client.jobs.retrieve(
+            "jobId",
+        )
+        assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncTogether) -> None:
+        response = await async_client.jobs.with_raw_response.retrieve(
+            "jobId",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = await response.parse()
+        assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncTogether) -> None:
+        async with async_client.jobs.with_streaming_response.retrieve(
+            "jobId",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(JobRetrieveResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncTogether) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+            await async_client.jobs.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncTogether) -> None:
+        job = await async_client.jobs.list()
+        assert_matches_type(JobListResponse, job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncTogether) -> None:
+        response = await async_client.jobs.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = await response.parse()
+        assert_matches_type(JobListResponse, job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncTogether) -> None:
+        async with async_client.jobs.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(JobListResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index 4a0d63e8..fbf910a0 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -9,7 +9,7 @@
 
 from together import Together, AsyncTogether
 from tests.utils import assert_matches_type
-from together.types import ModelListResponse
+from together.types import ModelListResponse, ModelUploadResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -42,6 +42,53 @@ def test_streaming_response_list(self, client: Together) -> None:
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    def test_method_upload(self, client: Together) -> None:
+        model = client.models.upload(
+            model_name="Qwen2.5-72B-Instruct",
+            model_source="unsloth/Qwen2.5-72B-Instruct",
+        )
+        assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+    @parametrize
+    def test_method_upload_with_all_params(self, client: Together) -> None:
+        model = client.models.upload(
+            model_name="Qwen2.5-72B-Instruct",
+            model_source="unsloth/Qwen2.5-72B-Instruct",
+            base_model="Qwen/Qwen2.5-72B-Instruct",
+            description="Finetuned Qwen2.5-72B-Instruct by Unsloth",
+            hf_token="hf_examplehuggingfacetoken",
+            lora_model="my_username/Qwen2.5-72B-Instruct-lora",
+            model_type="model",
+        )
+        assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+    @parametrize
+    def test_raw_response_upload(self, client: Together) -> None:
+        response = client.models.with_raw_response.upload(
+            model_name="Qwen2.5-72B-Instruct",
+            model_source="unsloth/Qwen2.5-72B-Instruct",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = response.parse()
+        assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+    @parametrize
+    def test_streaming_response_upload(self, client: Together) -> None:
+        with client.models.with_streaming_response.upload(
+            model_name="Qwen2.5-72B-Instruct",
+            model_source="unsloth/Qwen2.5-72B-Instruct",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = response.parse()
+            assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncModels:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
@@ -70,3 +117,50 @@ async def test_streaming_response_list(self, async_client: AsyncTogether) -> Non
             assert_matches_type(ModelListResponse, model, path=["response"])
 
         assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_upload(self, async_client: AsyncTogether) -> None:
+        model = await async_client.models.upload(
+            model_name="Qwen2.5-72B-Instruct",
+            model_source="unsloth/Qwen2.5-72B-Instruct",
+        )
+        assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+    @parametrize
+    async def test_method_upload_with_all_params(self, async_client: AsyncTogether) -> None:
+        model = await async_client.models.upload(
+            model_name="Qwen2.5-72B-Instruct",
+            model_source="unsloth/Qwen2.5-72B-Instruct",
+            base_model="Qwen/Qwen2.5-72B-Instruct",
+            description="Finetuned Qwen2.5-72B-Instruct by Unsloth",
+            hf_token="hf_examplehuggingfacetoken",
+            lora_model="my_username/Qwen2.5-72B-Instruct-lora",
+            model_type="model",
+        )
+        assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+    @parametrize
+    async def test_raw_response_upload(self, async_client: AsyncTogether) -> None:
+        response = await async_client.models.with_raw_response.upload(
+            model_name="Qwen2.5-72B-Instruct",
+            model_source="unsloth/Qwen2.5-72B-Instruct",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        model = await response.parse()
+        assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_upload(self, async_client: AsyncTogether) -> None:
+        async with async_client.models.with_streaming_response.upload(
+            model_name="Qwen2.5-72B-Instruct",
+            model_source="unsloth/Qwen2.5-72B-Instruct",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            model = await response.parse()
+            assert_matches_type(ModelUploadResponse, model, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/conftest.py b/tests/conftest.py
index 2262b6e7..b7e86792 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,7 +10,7 @@
 from together import Together, AsyncTogether
 
 if TYPE_CHECKING:
-    from _pytest.fixtures import FixtureRequest
+    from _pytest.fixtures import FixtureRequest  # pyright: ignore[reportPrivateImportUsage]
 
 pytest.register_assert_rewrite("tests.utils")
 
diff --git a/tests/test_client.py b/tests/test_client.py
index 5daf9572..b8c41bee 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -814,7 +814,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
         )
 
         assert response.retries_taken == failures_before_success
@@ -846,7 +846,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             extra_headers={"x-stainless-retry-count": Omit()},
         )
 
@@ -878,7 +878,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             extra_headers={"x-stainless-retry-count": "42"},
         )
 
@@ -1657,7 +1657,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
         )
 
         assert response.retries_taken == failures_before_success
@@ -1690,7 +1690,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             extra_headers={"x-stainless-retry-count": Omit()},
         )
 
@@ -1723,7 +1723,7 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                     "role": "system",
                 }
             ],
-            model="Qwen/Qwen2.5-72B-Instruct-Turbo",
+            model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
             extra_headers={"x-stainless-retry-count": "42"},
         )
 
@@ -1741,7 +1741,7 @@ def test_get_platform(self) -> None:
         import threading
 
         from together._utils import asyncify
-        from together._base_client import get_platform 
+        from together._base_client import get_platform
 
         async def test_main() -> None:
             result = await asyncify(get_platform)()
diff --git a/tests/test_models.py b/tests/test_models.py
index 6fbb96a3..da728846 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -492,12 +492,15 @@ class Model(BaseModel):
         resource_id: Optional[str] = None
 
     m = Model.construct()
+    assert m.resource_id is None
     assert "resource_id" not in m.model_fields_set
 
     m = Model.construct(resource_id=None)
+    assert m.resource_id is None
     assert "resource_id" in m.model_fields_set
 
     m = Model.construct(resource_id="foo")
+    assert m.resource_id == "foo"
     assert "resource_id" in m.model_fields_set
 
 
@@ -832,7 +835,7 @@ class B(BaseModel):
 
 @pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1")
 def test_type_alias_type() -> None:
-    Alias = TypeAliasType("Alias", str)
+    Alias = TypeAliasType("Alias", str)  # pyright: ignore
 
     class Model(BaseModel):
         alias: Alias
@@ -854,3 +857,35 @@ class Model(BaseModel):
     m = construct_type(value={"cls": "foo"}, type_=Model)
     assert isinstance(m, Model)
     assert isinstance(m.cls, str)
+
+
+def test_discriminated_union_case() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: bool
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: List[Union[A, object]]
+
+    class ModelA(BaseModel):
+        type: Literal["modelA"]
+
+        data: int
+
+    class ModelB(BaseModel):
+        type: Literal["modelB"]
+
+        required: str
+
+        data: Union[A, B]
+
+    # when constructing ModelA | ModelB, value data doesn't match ModelB exactly - missing `required`
+    m = construct_type(
+        value={"type": "modelB", "data": {"type": "a", "data": True}},
+        type_=cast(Any, Annotated[Union[ModelA, ModelB], PropertyInfo(discriminator="type")]),
+    )
+
+    assert isinstance(m, ModelB)
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 3dda603b..2a34b4f7 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -8,7 +8,7 @@
 
 import pytest
 
-from together._types import Base64FileInput
+from together._types import NOT_GIVEN, Base64FileInput
 from together._utils import (
     PropertyInfo,
     transform as _transform,
@@ -432,3 +432,22 @@ async def test_base64_file_input(use_async: bool) -> None:
     assert await transform({"foo": io.BytesIO(b"Hello, world!")}, TypedDictBase64Input, use_async) == {
         "foo": "SGVsbG8sIHdvcmxkIQ=="
     }  # type: ignore[comparison-overlap]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_transform_skipping(use_async: bool) -> None:
+    # lists of ints are left as-is
+    data = [1, 2, 3]
+    assert await transform(data, List[int], use_async) is data
+
+    # iterables of ints are converted to a list
+    data = iter([1, 2, 3])
+    assert await transform(data, Iterable[int], use_async) == [1, 2, 3]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_strips_notgiven(use_async: bool) -> None:
+    assert await transform({"foo_bar": "bar"}, Foo1, use_async) == {"fooBar": "bar"}
+    assert await transform({"foo_bar": NOT_GIVEN}, Foo1, use_async) == {}
diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py
index 75a517dd..7c25ecc7 100644
--- a/tests/test_utils/test_proxy.py
+++ b/tests/test_utils/test_proxy.py
@@ -21,3 +21,14 @@ def test_recursive_proxy() -> None:
     assert dir(proxy) == []
     assert type(proxy).__name__ == "RecursiveLazyProxy"
     assert type(operator.attrgetter("name.foo.bar.baz")(proxy)).__name__ == "RecursiveLazyProxy"
+
+
+def test_isinstance_does_not_error() -> None:
+    class AlwaysErrorProxy(LazyProxy[Any]):
+        @override
+        def __load__(self) -> Any:
+            raise RuntimeError("Mocking missing dependency")
+
+    proxy = AlwaysErrorProxy()
+    assert not isinstance(proxy, dict)
+    assert isinstance(proxy, LazyProxy)