From 9c9e67276776b7169bd2e9066c6049f5237ed044 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 13 Jun 2024 23:19:50 +0000
Subject: [PATCH 01/10] chore: go live (#1)

---
 .github/workflows/publish-pypi.yml   | 31 +++++++++++++
 .github/workflows/release-doctor.yml | 19 ++++++++
 .release-please-manifest.json        |  3 ++
 CONTRIBUTING.md                      |  4 +-
 README.md                            | 13 +++---
 bin/check-release-environment        | 32 ++++++++++++++
 pyproject.toml                       |  6 +--
 release-please-config.json           | 66 ++++++++++++++++++++++++++++
 src/together/_version.py             |  2 +-
 9 files changed, 162 insertions(+), 14 deletions(-)
 create mode 100644 .github/workflows/publish-pypi.yml
 create mode 100644 .github/workflows/release-doctor.yml
 create mode 100644 .release-please-manifest.json
 create mode 100644 bin/check-release-environment
 create mode 100644 release-please-config.json

diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
new file mode 100644
index 00000000..632c0e94
--- /dev/null
+++ b/.github/workflows/publish-pypi.yml
@@ -0,0 +1,31 @@
+# This workflow is triggered when a GitHub release is created.
+# It can also be run manually to re-publish to PyPI in case it failed for some reason.
+# You can run this workflow by navigating to https://www.github.com/togethercomputer/together-py/actions/workflows/publish-pypi.yml
+name: Publish PyPI
+on:
+  workflow_dispatch:
+
+  release:
+    types: [published]
+
+jobs:
+  publish:
+    name: publish
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: 0.24.0
+          RYE_INSTALL_OPTION: "--yes"
+
+      - name: Publish to PyPI
+        run: |
+          bash ./bin/publish-pypi
+        env:
+          PYPI_TOKEN: ${{ secrets.TOGETHER_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
new file mode 100644
index 00000000..5e4a97e0
--- /dev/null
+++ b/.github/workflows/release-doctor.yml
@@ -0,0 +1,19 @@
+name: Release Doctor
+on:
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  release_doctor:
+    name: release doctor
+    runs-on: ubuntu-latest
+    if: github.repository == 'togethercomputer/together-py' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Check release environment
+        run: |
+          bash ./bin/check-release-environment
+        env:
+          PYPI_TOKEN: ${{ secrets.TOGETHER_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
new file mode 100644
index 00000000..c4762802
--- /dev/null
+++ b/.release-please-manifest.json
@@ -0,0 +1,3 @@
+{
+  ".": "0.0.1-alpha.0"
+}
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 412eb3a2..36b2db47 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -59,7 +59,7 @@ If you’d like to use the repository from source, you can either install from g
 To install via git:
 
 ```bash
-pip install git+ssh://git@github.com/stainless-sdks/TogetherAI-python.git
+pip install git+ssh://git@github.com/togethercomputer/together-py.git
 ```
 
 Alternatively, you can build from source and install the wheel file:
@@ -117,7 +117,7 @@ the changes aren't made through the automated pipeline, you may want to make rel
 
 ### Publish with a GitHub workflow
 
-You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/stainless-sdks/TogetherAI-python/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up.
+You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/togethercomputer/together-py/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up.
 
 ### Publish manually
 
diff --git a/README.md b/README.md
index 5db7d518..e52c2070 100644
--- a/README.md
+++ b/README.md
@@ -15,13 +15,10 @@ The REST API documentation can be found [on docs.together.ai](https://docs.toget
 ## Installation
 
 ```sh
-# install from this staging repo
-pip install git+ssh://git@github.com/stainless-sdks/TogetherAI-python.git
+# install from PyPI
+pip install --pre together
 ```
 
-> [!NOTE]
-> Once this package is [published to PyPI](https://app.stainlessapi.com/docs/guides/publish), this will become: `pip install --pre together`
-
 ## Usage
 
 The full API of this library can be found in [api.md](api.md).
@@ -296,9 +293,9 @@ completion = response.parse()  # get the object that `chat.completions.create()`
 print(completion.choices)
 ```
 
-These methods return an [`APIResponse`](https://github.com/stainless-sdks/TogetherAI-python/tree/main/src/together/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/togethercomputer/together-py/tree/main/src/together/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/stainless-sdks/TogetherAI-python/tree/main/src/together/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/togethercomputer/together-py/tree/main/src/together/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 
@@ -394,7 +391,7 @@ This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) con
 
 We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience.
 
-We are keen for your feedback; please open an [issue](https://www.github.com/stainless-sdks/TogetherAI-python/issues) with questions, bugs, or suggestions.
+We are keen for your feedback; please open an [issue](https://www.github.com/togethercomputer/together-py/issues) with questions, bugs, or suggestions.
 
 ## Requirements
 
diff --git a/bin/check-release-environment b/bin/check-release-environment
new file mode 100644
index 00000000..8439dbde
--- /dev/null
+++ b/bin/check-release-environment
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+
+warnings=()
+errors=()
+
+if [ -z "${PYPI_TOKEN}" ]; then
+  warnings+=("The TOGETHER_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
+fi
+
+lenWarnings=${#warnings[@]}
+
+if [[ lenWarnings -gt 0 ]]; then
+  echo -e "Found the following warnings in the release environment:\n"
+
+  for warning in "${warnings[@]}"; do
+    echo -e "- $warning\n"
+  done
+fi
+
+lenErrors=${#errors[@]}
+
+if [[ lenErrors -gt 0 ]]; then
+  echo -e "Found the following errors in the release environment:\n"
+
+  for error in "${errors[@]}"; do
+    echo -e "- $error\n"
+  done
+
+  exit 1
+fi
+
+echo "The environment is ready to push releases!"
diff --git a/pyproject.toml b/pyproject.toml
index 1ccfdf6b..b834a03d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,8 +39,8 @@ classifiers = [
 
 
 [project.urls]
-Homepage = "https://github.com/stainless-sdks/TogetherAI-python"
-Repository = "https://github.com/stainless-sdks/TogetherAI-python"
+Homepage = "https://github.com/togethercomputer/together-py"
+Repository = "https://github.com/togethercomputer/together-py"
 
 
 
@@ -108,7 +108,7 @@ path = "README.md"
 [[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]]
 # replace relative links with absolute links
 pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)'
-replacement = '[\1](https://github.com/stainless-sdks/TogetherAI-python/tree/main/\g<2>)'
+replacement = '[\1](https://github.com/togethercomputer/together-py/tree/main/\g<2>)'
 
 [tool.black]
 line-length = 120
diff --git a/release-please-config.json b/release-please-config.json
new file mode 100644
index 00000000..9dc714d7
--- /dev/null
+++ b/release-please-config.json
@@ -0,0 +1,66 @@
+{
+  "packages": {
+    ".": {}
+  },
+  "$schema": "https://raw.githubusercontent.com/stainless-api/release-please/main/schemas/config.json",
+  "include-v-in-tag": true,
+  "include-component-in-tag": false,
+  "versioning": "prerelease",
+  "prerelease": true,
+  "bump-minor-pre-major": true,
+  "bump-patch-for-minor-pre-major": false,
+  "pull-request-header": "Automated Release PR",
+  "pull-request-title-pattern": "release: ${version}",
+  "changelog-sections": [
+    {
+      "type": "feat",
+      "section": "Features"
+    },
+    {
+      "type": "fix",
+      "section": "Bug Fixes"
+    },
+    {
+      "type": "perf",
+      "section": "Performance Improvements"
+    },
+    {
+      "type": "revert",
+      "section": "Reverts"
+    },
+    {
+      "type": "chore",
+      "section": "Chores"
+    },
+    {
+      "type": "docs",
+      "section": "Documentation"
+    },
+    {
+      "type": "style",
+      "section": "Styles"
+    },
+    {
+      "type": "refactor",
+      "section": "Refactors"
+    },
+    {
+      "type": "test",
+      "section": "Tests",
+      "hidden": true
+    },
+    {
+      "type": "build",
+      "section": "Build System"
+    },
+    {
+      "type": "ci",
+      "section": "Continuous Integration",
+      "hidden": true
+    }
+  ],
+  "release-type": "python",
+  "extra-files": [
+    "src/together/_version.py"
+  ]
+}
\ No newline at end of file
diff --git a/src/together/_version.py b/src/together/_version.py
index e8ad9cc7..e4f5dac8 100644
--- a/src/together/_version.py
+++ b/src/together/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "together"
-__version__ = "0.0.1-alpha.0"
+__version__ = "0.0.1-alpha.0"  # x-release-please-version

From cd703fbdb178f4f05ffc43af0e86f5218537ce5c Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 14 Jun 2024 23:26:13 +0000
Subject: [PATCH 02/10] feat(api): OpenAPI spec update via Stainless API (#3)

---
 .stats.yml                                    |  2 +-
 src/together/_client.py                       |  4 +-
 src/together/resources/chat/completions.py    | 46 +++++++++++--------
 src/together/resources/completions.py         | 28 +++++------
 src/together/resources/embeddings.py          |  4 +-
 src/together/resources/files.py               | 16 +++----
 src/together/resources/fine_tune.py           | 24 +++++-----
 src/together/resources/images.py              |  4 +-
 src/together/resources/models.py              |  4 +-
 .../types/chat/completion_create_params.py    |  8 ++--
 .../types/completion_create_params.py         |  2 +-
 tests/test_client.py                          |  4 +-
 12 files changed, 77 insertions(+), 69 deletions(-)

diff --git a/.stats.yml b/.stats.yml
index d332e906..02655e1f 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 15
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-6e975518a2563fdb57394133f1ed9dfe426a2cf5d2fef793fd139627c93df4aa.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-5934359dd4fbab352cb5042ffbf08374bd3d3b6bc0550fd09797de44626772fe.yml
diff --git a/src/together/_client.py b/src/together/_client.py
index 9e4fd0c3..77e8d83a 100644
--- a/src/together/_client.py
+++ b/src/together/_client.py
@@ -131,7 +131,7 @@ def qs(self) -> Querystring:
     @override
     def auth_headers(self) -> dict[str, str]:
         api_key = self.api_key
-        return {"Authorization": f"Bearer {api_key}"}
+        return {"Authorization": api_key}
 
     @property
     @override
@@ -313,7 +313,7 @@ def qs(self) -> Querystring:
     @override
     def auth_headers(self) -> dict[str, str]:
         api_key = self.api_key
-        return {"Authorization": f"Bearer {api_key}"}
+        return {"Authorization": api_key}
 
     @property
     @override
diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py
index d45eac28..43125d39 100644
--- a/src/together/resources/chat/completions.py
+++ b/src/together/resources/chat/completions.py
@@ -50,7 +50,7 @@ def create(
         model: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -74,7 +74,7 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
         """
-        Creates a model response for the given chat conversation.
+        Query a chat model.
 
         Args:
           messages: A list of messages comprising the conversation so far.
@@ -114,7 +114,8 @@ def create(
           stop: A list of string sequences that will truncate (stop) inference text output.
 
           stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+              Stream terminates with `data: [DONE]`. If false, return a single JSON object
+              containing the results.
 
           temperature: Determines the degree of randomness in the response.
 
@@ -147,7 +148,7 @@ def create(
         stream: Literal[True],
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -170,7 +171,7 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Stream[ChatCompletionChunk]:
         """
-        Creates a model response for the given chat conversation.
+        Query a chat model.
 
         Args:
           messages: A list of messages comprising the conversation so far.
@@ -178,7 +179,8 @@ def create(
           model: The name of the model to query.
 
           stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+              Stream terminates with `data: [DONE]`. If false, return a single JSON object
+              containing the results.
 
           echo: If set, the response will contain the prompt, and will also return prompt
               logprobs if set with logprobs.
@@ -243,7 +245,7 @@ def create(
         stream: bool,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -266,7 +268,7 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion | Stream[ChatCompletionChunk]:
         """
-        Creates a model response for the given chat conversation.
+        Query a chat model.
 
         Args:
           messages: A list of messages comprising the conversation so far.
@@ -274,7 +276,8 @@ def create(
           model: The name of the model to query.
 
           stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+              Stream terminates with `data: [DONE]`. If false, return a single JSON object
+              containing the results.
 
           echo: If set, the response will contain the prompt, and will also return prompt
               logprobs if set with logprobs.
@@ -338,7 +341,7 @@ def create(
         model: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -414,7 +417,7 @@ async def create(
         model: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -438,7 +441,7 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
         """
-        Creates a model response for the given chat conversation.
+        Query a chat model.
 
         Args:
           messages: A list of messages comprising the conversation so far.
@@ -478,7 +481,8 @@ async def create(
           stop: A list of string sequences that will truncate (stop) inference text output.
 
           stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+              Stream terminates with `data: [DONE]`. If false, return a single JSON object
+              containing the results.
 
           temperature: Determines the degree of randomness in the response.
 
@@ -511,7 +515,7 @@ async def create(
         stream: Literal[True],
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -534,7 +538,7 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> AsyncStream[ChatCompletionChunk]:
         """
-        Creates a model response for the given chat conversation.
+        Query a chat model.
 
         Args:
           messages: A list of messages comprising the conversation so far.
@@ -542,7 +546,8 @@ async def create(
           model: The name of the model to query.
 
           stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+              Stream terminates with `data: [DONE]`. If false, return a single JSON object
+              containing the results.
 
           echo: If set, the response will contain the prompt, and will also return prompt
               logprobs if set with logprobs.
@@ -607,7 +612,7 @@ async def create(
         stream: bool,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -630,7 +635,7 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
         """
-        Creates a model response for the given chat conversation.
+        Query a chat model.
 
         Args:
           messages: A list of messages comprising the conversation so far.
@@ -638,7 +643,8 @@ async def create(
           model: The name of the model to query.
 
           stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+              Stream terminates with `data: [DONE]`. If false, return a single JSON object
+              containing the results.
 
           echo: If set, the response will contain the prompt, and will also return prompt
               logprobs if set with logprobs.
@@ -702,7 +708,7 @@ async def create(
         model: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
diff --git a/src/together/resources/completions.py b/src/together/resources/completions.py
index fb72ecb8..163fc7a5 100644
--- a/src/together/resources/completions.py
+++ b/src/together/resources/completions.py
@@ -48,7 +48,7 @@ def create(
         prompt: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -69,7 +69,7 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Completion:
         """
-        Creates a completion for the provided prompt and parameters
+        Query a language, code, or image model.
 
         Args:
           model: The name of the model to query.
@@ -136,7 +136,7 @@ def create(
         stream: Literal[True],
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -156,7 +156,7 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Stream[Completion]:
         """
-        Creates a completion for the provided prompt and parameters
+        Query a language, code, or image model.
 
         Args:
           model: The name of the model to query.
@@ -223,7 +223,7 @@ def create(
         stream: bool,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -243,7 +243,7 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Completion | Stream[Completion]:
         """
-        Creates a completion for the provided prompt and parameters
+        Query a language, code, or image model.
 
         Args:
           model: The name of the model to query.
@@ -309,7 +309,7 @@ def create(
         prompt: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -379,7 +379,7 @@ async def create(
         prompt: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -400,7 +400,7 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Completion:
         """
-        Creates a completion for the provided prompt and parameters
+        Query a language, code, or image model.
 
         Args:
           model: The name of the model to query.
@@ -467,7 +467,7 @@ async def create(
         stream: Literal[True],
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -487,7 +487,7 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> AsyncStream[Completion]:
         """
-        Creates a completion for the provided prompt and parameters
+        Query a language, code, or image model.
 
         Args:
           model: The name of the model to query.
@@ -554,7 +554,7 @@ async def create(
         stream: bool,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
@@ -574,7 +574,7 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Completion | AsyncStream[Completion]:
         """
-        Creates a completion for the provided prompt and parameters
+        Query a language, code, or image model.
 
         Args:
           model: The name of the model to query.
@@ -640,7 +640,7 @@ async def create(
         prompt: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
         min_p: float | NotGiven = NOT_GIVEN,
diff --git a/src/together/resources/embeddings.py b/src/together/resources/embeddings.py
index 75ccf4bd..290c574d 100644
--- a/src/together/resources/embeddings.py
+++ b/src/together/resources/embeddings.py
@@ -48,7 +48,7 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Embedding:
         """
-        Creates an embedding vector representing the input text
+        Query an embedding model for a given string of text.
 
         Args:
           input: A string providing the text for the model to embed.
@@ -101,7 +101,7 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Embedding:
         """
-        Creates an embedding vector representing the input text
+        Query an embedding model for a given string of text.
 
         Args:
           input: A string providing the text for the model to embed.
diff --git a/src/together/resources/files.py b/src/together/resources/files.py
index ee055f07..90b95bf1 100644
--- a/src/together/resources/files.py
+++ b/src/together/resources/files.py
@@ -52,7 +52,7 @@ def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FileRetrieveResponse:
         """
-        Retrieve a file
+        List the metadata for a single uploaded data file.
 
         Args:
           extra_headers: Send extra headers
@@ -83,7 +83,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FileListResponse:
-        """List all files"""
+        """List the metadata for all uploaded data files."""
         return self._get(
             "/files",
             options=make_request_options(
@@ -104,7 +104,7 @@ def delete(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FileDeleteResponse:
         """
-        Delete a file
+        Delete a previously uploaded data file.
 
         Args:
           extra_headers: Send extra headers
@@ -137,7 +137,7 @@ def content(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> BinaryAPIResponse:
         """
-        Retrieve file content
+        Get the contents of a single uploaded data file.
 
         Args:
           extra_headers: Send extra headers
@@ -181,7 +181,7 @@ async def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FileRetrieveResponse:
         """
-        Retrieve a file
+        List the metadata for a single uploaded data file.
 
         Args:
           extra_headers: Send extra headers
@@ -212,7 +212,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FileListResponse:
-        """List all files"""
+        """List the metadata for all uploaded data files."""
         return await self._get(
             "/files",
             options=make_request_options(
@@ -233,7 +233,7 @@ async def delete(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FileDeleteResponse:
         """
-        Delete a file
+        Delete a previously uploaded data file.
 
         Args:
           extra_headers: Send extra headers
@@ -266,7 +266,7 @@ async def content(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> AsyncBinaryAPIResponse:
         """
-        Retrieve file content
+        Get the contents of a single uploaded data file.
 
         Args:
           extra_headers: Send extra headers
diff --git a/src/together/resources/fine_tune.py b/src/together/resources/fine_tune.py
index e3aab276..c75ac6ae 100644
--- a/src/together/resources/fine_tune.py
+++ b/src/together/resources/fine_tune.py
@@ -57,7 +57,7 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTune:
         """
-        Create a fine-tuning job
+        Use a model to create a fine-tuning job.
 
         Args:
           model: Name of the base model to run fine-tune job on
@@ -117,7 +117,7 @@ def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTune:
         """
-        Retrieve fine-tune job details
+        List the metadata for a single fine-tuning job.
 
         Args:
           extra_headers: Send extra headers
@@ -148,7 +148,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTuneListResponse:
-        """List fine-tune job history"""
+        """List the metadata for all fine-tuning jobs."""
         return self._get(
             "/fine-tunes",
             options=make_request_options(
@@ -169,7 +169,7 @@ def cancel(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTune:
         """
-        Cancels a running fine-tuning job.
+        Cancel a currently running fine-tuning job.
 
         Args:
           extra_headers: Send extra headers
@@ -204,7 +204,7 @@ def download(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTuneDownloadResponse:
         """
-        Downloads a compressed fine-tuned model or checkpoint to local disk.
+        Download a compressed fine-tuned model or checkpoint to local disk.
 
         Args:
           ft_id: Fine-tune ID to download. A string that starts with `ft-`.
@@ -253,7 +253,7 @@ def list_events(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTuneEvent:
         """
-        List events of a fine-tune job
+        List the events for a single fine-tuning job.
 
         Args:
           extra_headers: Send extra headers
@@ -303,7 +303,7 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTune:
         """
-        Create a fine-tuning job
+        Use a model to create a fine-tuning job.
 
         Args:
           model: Name of the base model to run fine-tune job on
@@ -363,7 +363,7 @@ async def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTune:
         """
-        Retrieve fine-tune job details
+        List the metadata for a single fine-tuning job.
 
         Args:
           extra_headers: Send extra headers
@@ -394,7 +394,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTuneListResponse:
-        """List fine-tune job history"""
+        """List the metadata for all fine-tuning jobs."""
         return await self._get(
             "/fine-tunes",
             options=make_request_options(
@@ -415,7 +415,7 @@ async def cancel(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTune:
         """
-        Cancels a running fine-tuning job.
+        Cancel a currently running fine-tuning job.
 
         Args:
           extra_headers: Send extra headers
@@ -450,7 +450,7 @@ async def download(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTuneDownloadResponse:
         """
-        Downloads a compressed fine-tuned model or checkpoint to local disk.
+        Download a compressed fine-tuned model or checkpoint to local disk.
 
         Args:
           ft_id: Fine-tune ID to download. A string that starts with `ft-`.
@@ -499,7 +499,7 @@ async def list_events(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> FineTuneEvent:
         """
-        List events of a fine-tune job
+        List the events for a single fine-tuning job.
 
         Args:
           extra_headers: Send extra headers
diff --git a/src/together/resources/images.py b/src/together/resources/images.py
index d57d1240..ec9156c6 100644
--- a/src/together/resources/images.py
+++ b/src/together/resources/images.py
@@ -54,7 +54,7 @@ def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ImageFile:
         """
-        Generate images based on a given prompt using a specified model
+        Use an image model to generate an image for a given prompt.
 
         Args:
           model: The model to use for image generation.
@@ -131,7 +131,7 @@ async def create(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ImageFile:
         """
-        Generate images based on a given prompt using a specified model
+        Use an image model to generate an image for a given prompt.
 
         Args:
           model: The model to use for image generation.
diff --git a/src/together/resources/models.py b/src/together/resources/models.py
index edb727fc..da92d7b9 100644
--- a/src/together/resources/models.py
+++ b/src/together/resources/models.py
@@ -40,7 +40,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModelListResponse:
-        """Lists all the available models"""
+        """Lists all of Together's open-source models"""
         return self._get(
             "/models",
             options=make_request_options(
@@ -69,7 +69,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModelListResponse:
-        """Lists all the available models"""
+        """Lists all of Together's open-source models"""
         return await self._get(
             "/models",
             options=make_request_options(
diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py
index d1e404b9..05c6a86a 100644
--- a/src/together/types/chat/completion_create_params.py
+++ b/src/together/types/chat/completion_create_params.py
@@ -38,7 +38,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     mentioned prior.
     """
 
-    logit_bias: Dict[str, object]
+    logit_bias: Dict[str, float]
     """
     The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
     appearing in the generated output.
@@ -128,7 +128,8 @@ class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
     stream: Literal[False]
     """If set, tokens are returned as Server-Sent Events as they are made available.
 
-    Stream terminates with `data: [DONE]`
+    Stream terminates with `data: [DONE]`. If false, return a single JSON object
+    containing the results.
     """
 
 
@@ -136,7 +137,8 @@ class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
     stream: Required[Literal[True]]
     """If set, tokens are returned as Server-Sent Events as they are made available.
 
-    Stream terminates with `data: [DONE]`
+    Stream terminates with `data: [DONE]`. If false, return a single JSON object
+    containing the results.
     """
 
 
diff --git a/src/together/types/completion_create_params.py b/src/together/types/completion_create_params.py
index f2cb084e..7f4e1fef 100644
--- a/src/together/types/completion_create_params.py
+++ b/src/together/types/completion_create_params.py
@@ -28,7 +28,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     mentioned prior.
     """
 
-    logit_bias: Dict[str, object]
+    logit_bias: Dict[str, float]
     """
     The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
     appearing in the generated output.
diff --git a/tests/test_client.py b/tests/test_client.py
index b09a8db0..2e3679a9 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -332,7 +332,7 @@ def test_default_headers_option(self) -> None:
     def test_validate_headers(self) -> None:
         client = Together(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
-        assert request.headers.get("Authorization") == f"Bearer {api_key}"
+        assert request.headers.get("Authorization") == api_key
 
         with pytest.raises(TogetherError):
             client2 = Together(base_url=base_url, api_key=None, _strict_response_validation=True)
@@ -1048,7 +1048,7 @@ def test_default_headers_option(self) -> None:
     def test_validate_headers(self) -> None:
         client = AsyncTogether(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
-        assert request.headers.get("Authorization") == f"Bearer {api_key}"
+        assert request.headers.get("Authorization") == api_key
 
         with pytest.raises(TogetherError):
             client2 = AsyncTogether(base_url=base_url, api_key=None, _strict_response_validation=True)

From 00ef6cc33f844ef3d214e805f3bdfa28240905b7 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 21 Jun 2024 16:28:55 +0000
Subject: [PATCH 03/10] feat(api): OpenAPI spec update via Stainless API (#4)

---
 .stats.yml                                    |   2 +-
 src/together/_base_client.py                  |  25 +-
 src/together/_utils/__init__.py               |   1 +
 src/together/_utils/_reflection.py            |   8 +
 src/together/_utils/_sync.py                  |  19 +-
 src/together/resources/chat/completions.py    | 390 +++++++++++-------
 src/together/resources/completions.py         | 338 +++++++++------
 .../types/chat/completion_create_params.py    |  92 +++--
 .../types/completion_create_params.py         |  76 ++--
 9 files changed, 585 insertions(+), 366 deletions(-)
 create mode 100644 src/together/_utils/_reflection.py

diff --git a/.stats.yml b/.stats.yml
index 02655e1f..d6da9ca3 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 15
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-5934359dd4fbab352cb5042ffbf08374bd3d3b6bc0550fd09797de44626772fe.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-33661dd8fd4c26ecd595dee22e2c9274e6c4699ad8de5ece233e0d37376c6b7c.yml
diff --git a/src/together/_base_client.py b/src/together/_base_client.py
index 66db09be..a806809b 100644
--- a/src/together/_base_client.py
+++ b/src/together/_base_client.py
@@ -60,7 +60,7 @@
     RequestOptions,
     ModelBuilderProtocol,
 )
-from ._utils import is_dict, is_list, is_given, lru_cache, is_mapping
+from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping
 from ._compat import model_copy, model_dump
 from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
 from ._response import (
@@ -358,6 +358,7 @@ def __init__(
         self._custom_query = custom_query or {}
         self._strict_response_validation = _strict_response_validation
         self._idempotency_header = None
+        self._platform: Platform | None = None
 
         if max_retries is None:  # pyright: ignore[reportUnnecessaryComparison]
             raise TypeError(
@@ -456,7 +457,7 @@ def _build_request(
                 raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
 
         headers = self._build_headers(options)
-        params = _merge_mappings(self._custom_query, options.params)
+        params = _merge_mappings(self.default_query, options.params)
         content_type = headers.get("Content-Type")
 
         # If the given Content-Type header is multipart/form-data then it
@@ -592,6 +593,12 @@ def default_headers(self) -> dict[str, str | Omit]:
             **self._custom_headers,
         }
 
+    @property
+    def default_query(self) -> dict[str, object]:
+        return {
+            **self._custom_query,
+        }
+
     def _validate_headers(
         self,
         headers: Headers,  # noqa: ARG002
@@ -616,7 +623,10 @@ def base_url(self, url: URL | str) -> None:
         self._base_url = self._enforce_trailing_slash(url if isinstance(url, URL) else URL(url))
 
     def platform_headers(self) -> Dict[str, str]:
-        return platform_headers(self._version)
+        # the actual implementation is in a separate `lru_cache` decorated
+        # function because adding `lru_cache` to methods will leak memory
+        # https://github.com/python/cpython/issues/88476
+        return platform_headers(self._version, platform=self._platform)
 
     def _parse_retry_after_header(self, response_headers: Optional[httpx.Headers] = None) -> float | None:
         """Returns a float of the number of seconds (not milliseconds) to wait after retrying, or None if unspecified.
@@ -1492,6 +1502,11 @@ async def _request(
         stream_cls: type[_AsyncStreamT] | None,
         remaining_retries: int | None,
     ) -> ResponseT | _AsyncStreamT:
+        if self._platform is None:
+            # `get_platform` can make blocking IO calls so we
+            # execute it earlier while we are in an async context
+            self._platform = await asyncify(get_platform)()
+
         cast_to = self._maybe_override_cast_to(cast_to, options)
         await self._prepare_options(options)
 
@@ -1915,11 +1930,11 @@ def get_platform() -> Platform:
 
 
 @lru_cache(maxsize=None)
-def platform_headers(version: str) -> Dict[str, str]:
+def platform_headers(version: str, *, platform: Platform | None) -> Dict[str, str]:
     return {
         "X-Stainless-Lang": "python",
         "X-Stainless-Package-Version": version,
-        "X-Stainless-OS": str(get_platform()),
+        "X-Stainless-OS": str(platform or get_platform()),
         "X-Stainless-Arch": str(get_architecture()),
         "X-Stainless-Runtime": get_python_runtime(),
         "X-Stainless-Runtime-Version": get_python_version(),
diff --git a/src/together/_utils/__init__.py b/src/together/_utils/__init__.py
index 31b5b227..667e2473 100644
--- a/src/together/_utils/__init__.py
+++ b/src/together/_utils/__init__.py
@@ -49,3 +49,4 @@
     maybe_transform as maybe_transform,
     async_maybe_transform as async_maybe_transform,
 )
+from ._reflection import function_has_argument as function_has_argument
diff --git a/src/together/_utils/_reflection.py b/src/together/_utils/_reflection.py
new file mode 100644
index 00000000..e134f58e
--- /dev/null
+++ b/src/together/_utils/_reflection.py
@@ -0,0 +1,8 @@
+import inspect
+from typing import Any, Callable
+
+
+def function_has_argument(func: Callable[..., Any], arg_name: str) -> bool:
+    """Returns whether or not the given function has a specific parameter"""
+    sig = inspect.signature(func)
+    return arg_name in sig.parameters
diff --git a/src/together/_utils/_sync.py b/src/together/_utils/_sync.py
index 595924e5..d0d81033 100644
--- a/src/together/_utils/_sync.py
+++ b/src/together/_utils/_sync.py
@@ -7,6 +7,8 @@
 import anyio
 import anyio.to_thread
 
+from ._reflection import function_has_argument
+
 T_Retval = TypeVar("T_Retval")
 T_ParamSpec = ParamSpec("T_ParamSpec")
 
@@ -59,6 +61,21 @@ def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
 
     async def wrapper(*args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs) -> T_Retval:
         partial_f = functools.partial(function, *args, **kwargs)
-        return await anyio.to_thread.run_sync(partial_f, cancellable=cancellable, limiter=limiter)
+
+        # In `v4.1.0` anyio added the `abandon_on_cancel` argument and deprecated the old
+        # `cancellable` argument, so we need to use the new `abandon_on_cancel` to avoid
+        # surfacing deprecation warnings.
+        if function_has_argument(anyio.to_thread.run_sync, "abandon_on_cancel"):
+            return await anyio.to_thread.run_sync(
+                partial_f,
+                abandon_on_cancel=cancellable,
+                limiter=limiter,
+            )
+
+        return await anyio.to_thread.run_sync(
+            partial_f,
+            cancellable=cancellable,
+            limiter=limiter,
+        )
 
     return wrapper
diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py
index 43125d39..0d69a7c8 100644
--- a/src/together/resources/chat/completions.py
+++ b/src/together/resources/chat/completions.py
@@ -81,53 +81,66 @@ def create(
 
           model: The name of the model to query.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          response_format: Specifies the format of the response.
+          response_format: An object specifying the format that the model must output.
 
-          safety_model: The name of the safety model to use.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
 
-          stop: A list of string sequences that will truncate (stop) inference text output.
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`. If false, return a single JSON object
-              containing the results.
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          temperature: Determines the degree of randomness in the response.
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
 
-          tool_choice: The choice of tool to use.
+          tool_choice: Controls which (if any) function is called by the model. By default uses `auto`,
+              which lets the model pick between generating a message or calling a function.
 
-          tools: A list of tools to be used in the query.
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for.
 
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
 
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
@@ -178,53 +191,66 @@ def create(
 
           model: The name of the model to query.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`. If false, return a single JSON object
-              containing the results.
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          response_format: Specifies the format of the response.
+          response_format: An object specifying the format that the model must output.
 
-          safety_model: The name of the safety model to use.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
 
-          stop: A list of string sequences that will truncate (stop) inference text output.
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
 
-          temperature: Determines the degree of randomness in the response.
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
 
-          tool_choice: The choice of tool to use.
+          tool_choice: Controls which (if any) function is called by the model. By default uses `auto`,
+              which lets the model pick between generating a message or calling a function.
 
-          tools: A list of tools to be used in the query.
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for.
 
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
 
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
@@ -275,53 +301,66 @@ def create(
 
           model: The name of the model to query.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`. If false, return a single JSON object
-              containing the results.
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          response_format: Specifies the format of the response.
+          response_format: An object specifying the format that the model must output.
 
-          safety_model: The name of the safety model to use.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
 
-          stop: A list of string sequences that will truncate (stop) inference text output.
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
 
-          temperature: Determines the degree of randomness in the response.
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
 
-          tool_choice: The choice of tool to use.
+          tool_choice: Controls which (if any) function is called by the model. By default uses `auto`,
+              which lets the model pick between generating a message or calling a function.
 
-          tools: A list of tools to be used in the query.
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for.
 
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
 
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
@@ -448,53 +487,66 @@ async def create(
 
           model: The name of the model to query.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          response_format: Specifies the format of the response.
+          response_format: An object specifying the format that the model must output.
 
-          safety_model: The name of the safety model to use.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
 
-          stop: A list of string sequences that will truncate (stop) inference text output.
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`. If false, return a single JSON object
-              containing the results.
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          temperature: Determines the degree of randomness in the response.
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
 
-          tool_choice: The choice of tool to use.
+          tool_choice: Controls which (if any) function is called by the model. By default uses `auto`,
+              which lets the model pick between generating a message or calling a function.
 
-          tools: A list of tools to be used in the query.
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for.
 
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
 
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
@@ -545,53 +597,66 @@ async def create(
 
           model: The name of the model to query.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`. If false, return a single JSON object
-              containing the results.
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          response_format: Specifies the format of the response.
+          response_format: An object specifying the format that the model must output.
 
-          safety_model: The name of the safety model to use.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
 
-          stop: A list of string sequences that will truncate (stop) inference text output.
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
 
-          temperature: Determines the degree of randomness in the response.
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
 
-          tool_choice: The choice of tool to use.
+          tool_choice: Controls which (if any) function is called by the model. By default uses `auto`,
+              which lets the model pick between generating a message or calling a function.
 
-          tools: A list of tools to be used in the query.
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for.
 
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
 
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
@@ -642,53 +707,66 @@ async def create(
 
           model: The name of the model to query.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`. If false, return a single JSON object
-              containing the results.
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          response_format: Specifies the format of the response.
+          response_format: An object specifying the format that the model must output.
 
-          safety_model: The name of the safety model to use.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
 
-          stop: A list of string sequences that will truncate (stop) inference text output.
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
 
-          temperature: Determines the degree of randomness in the response.
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
 
-          tool_choice: The choice of tool to use.
+          tool_choice: Controls which (if any) function is called by the model. By default uses `auto`,
+              which lets the model pick between generating a message or calling a function.
 
-          tools: A list of tools to be used in the query.
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for.
 
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
 
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
diff --git a/src/together/resources/completions.py b/src/together/resources/completions.py
index 163fc7a5..3f777147 100644
--- a/src/together/resources/completions.py
+++ b/src/together/resources/completions.py
@@ -76,46 +76,57 @@ def create(
 
           prompt: A string providing context for the model to complete.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          safety_model: The name of the safety model to use.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
 
-          stop: A list of string sequences that will truncate (stop) inference text output.
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          temperature: Determines the degree of randomness in the response.
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
 
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
 
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
@@ -163,46 +174,57 @@ def create(
 
           prompt: A string providing context for the model to complete.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          safety_model: The name of the safety model to use.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
 
-          stop: A list of string sequences that will truncate (stop) inference text output.
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
 
-          temperature: Determines the degree of randomness in the response.
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
 
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
 
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
@@ -250,46 +272,57 @@ def create(
 
           prompt: A string providing context for the model to complete.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          safety_model: The name of the safety model to use.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
 
-          stop: A list of string sequences that will truncate (stop) inference text output.
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
 
-          temperature: Determines the degree of randomness in the response.
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
 
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
 
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
@@ -407,46 +440,57 @@ async def create(
 
           prompt: A string providing context for the model to complete.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          safety_model: The name of the safety model to use.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
 
-          stop: A list of string sequences that will truncate (stop) inference text output.
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          temperature: Determines the degree of randomness in the response.
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
 
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
 
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
@@ -494,46 +538,57 @@ async def create(
 
           prompt: A string providing context for the model to complete.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          safety_model: The name of the safety model to use.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
 
-          stop: A list of string sequences that will truncate (stop) inference text output.
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
 
-          temperature: Determines the degree of randomness in the response.
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
 
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
 
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
@@ -581,46 +636,57 @@ async def create(
 
           prompt: A string providing context for the model to complete.
 
-          stream: If set, tokens are returned as Server-Sent Events as they are made available.
-              Stream terminates with `data: [DONE]`
+          stream: If true, stream tokens as Server-Sent Events as the model generates them instead
+              of waiting for the full model response. The stream terminates with
+              `data: [DONE]`. If false, return a single JSON object containing the results.
 
-          echo: If set, the response will contain the prompt, and will also return prompt
-              logprobs if set with logprobs.
+          echo: If true, the response will contain the prompt. Can be used with `logprobs` to
+              return prompt logprobs.
 
-          frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will decrease the likelihood of repeating tokens that were
-              mentioned prior.
+          frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+              repeating tokens that have already been mentioned.
 
-          logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-              appearing in the generated output.
+          logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output.
 
           logprobs: Determines the number of most likely tokens to return at each token position log
-              probabilities to return
+              probabilities to return.
 
           max_tokens: The maximum number of tokens to generate.
 
-          min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to
-              `temperature`.
+          min_p: A number between 0 and 1 that can be used as an alternative to temperature.
 
-          n: Number of generations to return
+          n: The number of completions to generate for each prompt.
 
-          presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-              positive value will increase the likelihood of a model talking about new topics.
+          presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of
+              a model talking about new topics.
 
           repetition_penalty: A number that controls the diversity of generated text by reducing the
               likelihood of repeated sequences. Higher values decrease repetition.
 
-          safety_model: The name of the safety model to use.
-
-          stop: A list of string sequences that will truncate (stop) inference text output.
-
-          temperature: Determines the degree of randomness in the response.
-
-          top_k: The `top_k` parameter is used to limit the number of choices for the next
-              predicted word or token.
-
-          top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-              choices for each predicted token based on the cumulative probabilities.
+          safety_model: The name of the moderation model used to validate tokens. Choose from the
+              available moderation models found
+              [here](https://docs.together.ai/docs/inference-models#moderation-models).
+
+          stop: A list of string sequences that will truncate (stop) inference text output. For
+              example, "</s>" will stop generation as soon as the model generates the given
+              token.
+
+          temperature: A decimal number from 0-1 that determines the degree of randomness in the
+              response. A temperature less than 1 favors more correctness and is appropriate
+              for question answering or summarization. A value closer to 1 introduces more
+              randomness in the output.
+
+          top_k: An integer that's used to limit the number of choices for the next predicted
+              word or token. It specifies the maximum number of tokens to consider at each
+              step, based on their probability of occurrence. This technique helps to speed up
+              the generation process and can improve the quality of the generated text by
+              focusing on the most likely options.
+
+          top_p: A percentage (also called the nucleus parameter) that's used to dynamically
+              adjust the number of choices for each predicted token based on the cumulative
+              probabilities. It specifies a probability threshold below which all less likely
+              tokens are filtered out. This technique helps maintain diversity and generate
+              more fluent and natural-sounding text.
 
           extra_headers: Send extra headers
 
diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py
index 05c6a86a..8b57ef5b 100644
--- a/src/together/types/chat/completion_create_params.py
+++ b/src/together/types/chat/completion_create_params.py
@@ -26,46 +26,39 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """The name of the model to query."""
 
     echo: bool
-    """
-    If set, the response will contain the prompt, and will also return prompt
-    logprobs if set with logprobs.
+    """If true, the response will contain the prompt.
+
+    Can be used with `logprobs` to return prompt logprobs.
     """
 
     frequency_penalty: float
     """
-    The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-    positive value will decrease the likelihood of repeating tokens that were
-    mentioned prior.
+    A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+    repeating tokens that have already been mentioned.
     """
 
     logit_bias: Dict[str, float]
-    """
-    The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-    appearing in the generated output.
-    """
+    """Adjusts the likelihood of specific tokens appearing in the generated output."""
 
     logprobs: int
     """
     Determines the number of most likely tokens to return at each token position log
-    probabilities to return
+    probabilities to return.
     """
 
     max_tokens: int
     """The maximum number of tokens to generate."""
 
     min_p: float
-    """
-    The `min_p` parameter is a number between 0 and 1 and an alternative to
-    `temperature`.
-    """
+    """A number between 0 and 1 that can be used as an alternative to temperature."""
 
     n: int
-    """Number of generations to return"""
+    """The number of completions to generate for each prompt."""
 
     presence_penalty: float
     """
-    The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-    positive value will increase the likelihood of a model talking about new topics.
+    A number between -2.0 and 2.0 where a positive value increases the likelihood of
+    a model talking about new topics.
     """
 
     repetition_penalty: float
@@ -75,33 +68,60 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     response_format: ResponseFormat
-    """Specifies the format of the response."""
+    """An object specifying the format that the model must output."""
 
     safety_model: str
-    """The name of the safety model to use."""
+    """The name of the moderation model used to validate tokens.
+
+    Choose from the available moderation models found
+    [here](https://docs.together.ai/docs/inference-models#moderation-models).
+    """
 
     stop: List[str]
-    """A list of string sequences that will truncate (stop) inference text output."""
+    """A list of string sequences that will truncate (stop) inference text output.
+
+    For example, "</s>" will stop generation as soon as the model generates the
+    given token.
+    """
 
     temperature: float
-    """Determines the degree of randomness in the response."""
+    """
+    A decimal number from 0-1 that determines the degree of randomness in the
+    response. A temperature less than 1 favors more correctness and is appropriate
+    for question answering or summarization. A value closer to 1 introduces more
+    randomness in the output.
+    """
 
     tool_choice: ToolChoice
-    """The choice of tool to use."""
+    """Controls which (if any) function is called by the model.
+
+    By default uses `auto`, which lets the model pick between generating a message
+    or calling a function.
+    """
 
     tools: Iterable[ToolsParam]
-    """A list of tools to be used in the query."""
+    """A list of tools the model may call.
+
+    Currently, only functions are supported as a tool. Use this to provide a list of
+    functions the model may generate JSON inputs for.
+    """
 
     top_k: int
     """
-    The `top_k` parameter is used to limit the number of choices for the next
-    predicted word or token.
+    An integer that's used to limit the number of choices for the next predicted
+    word or token. It specifies the maximum number of tokens to consider at each
+    step, based on their probability of occurrence. This technique helps to speed up
+    the generation process and can improve the quality of the generated text by
+    focusing on the most likely options.
     """
 
     top_p: float
     """
-    The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-    choices for each predicted token based on the cumulative probabilities.
+    A percentage (also called the nucleus parameter) that's used to dynamically
+    adjust the number of choices for each predicted token based on the cumulative
+    probabilities. It specifies a probability threshold below which all less likely
+    tokens are filtered out. This technique helps maintain diversity and generate
+    more fluent and natural-sounding text.
     """
 
 
@@ -126,19 +146,19 @@ class ResponseFormat(TypedDict, total=False):
 
 class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
     stream: Literal[False]
-    """If set, tokens are returned as Server-Sent Events as they are made available.
-
-    Stream terminates with `data: [DONE]`. If false, return a single JSON object
-    containing the results.
+    """
+    If true, stream tokens as Server-Sent Events as the model generates them instead
+    of waiting for the full model response. The stream terminates with
+    `data: [DONE]`. If false, return a single JSON object containing the results.
     """
 
 
 class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
     stream: Required[Literal[True]]
-    """If set, tokens are returned as Server-Sent Events as they are made available.
-
-    Stream terminates with `data: [DONE]`. If false, return a single JSON object
-    containing the results.
+    """
+    If true, stream tokens as Server-Sent Events as the model generates them instead
+    of waiting for the full model response. The stream terminates with
+    `data: [DONE]`. If false, return a single JSON object containing the results.
     """
 
 
diff --git a/src/together/types/completion_create_params.py b/src/together/types/completion_create_params.py
index 7f4e1fef..050a5477 100644
--- a/src/together/types/completion_create_params.py
+++ b/src/together/types/completion_create_params.py
@@ -16,46 +16,39 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """A string providing context for the model to complete."""
 
     echo: bool
-    """
-    If set, the response will contain the prompt, and will also return prompt
-    logprobs if set with logprobs.
+    """If true, the response will contain the prompt.
+
+    Can be used with `logprobs` to return prompt logprobs.
     """
 
     frequency_penalty: float
     """
-    The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a
-    positive value will decrease the likelihood of repeating tokens that were
-    mentioned prior.
+    A number between -2.0 and 2.0 where a positive value decreases the likelihood of
+    repeating tokens that have already been mentioned.
     """
 
     logit_bias: Dict[str, float]
-    """
-    The `logit_bias` parameter allows us to adjust the likelihood of specific tokens
-    appearing in the generated output.
-    """
+    """Adjusts the likelihood of specific tokens appearing in the generated output."""
 
     logprobs: int
     """
     Determines the number of most likely tokens to return at each token position log
-    probabilities to return
+    probabilities to return.
     """
 
     max_tokens: int
     """The maximum number of tokens to generate."""
 
     min_p: float
-    """
-    The `min_p` parameter is a number between 0 and 1 and an alternative to
-    `temperature`.
-    """
+    """A number between 0 and 1 that can be used as an alternative to temperature."""
 
     n: int
-    """Number of generations to return"""
+    """The number of completions to generate for each prompt."""
 
     presence_penalty: float
     """
-    The `presence_penalty` parameter is a number between -2.0 and 2.0 where a
-    positive value will increase the likelihood of a model talking about new topics.
+    A number between -2.0 and 2.0 where a positive value increases the likelihood of
+    a model talking about new topics.
     """
 
     repetition_penalty: float
@@ -65,40 +58,61 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     safety_model: str
-    """The name of the safety model to use."""
+    """The name of the moderation model used to validate tokens.
+
+    Choose from the available moderation models found
+    [here](https://docs.together.ai/docs/inference-models#moderation-models).
+    """
 
     stop: List[str]
-    """A list of string sequences that will truncate (stop) inference text output."""
+    """A list of string sequences that will truncate (stop) inference text output.
+
+    For example, "</s>" will stop generation as soon as the model generates the
+    given token.
+    """
 
     temperature: float
-    """Determines the degree of randomness in the response."""
+    """
+    A decimal number from 0-1 that determines the degree of randomness in the
+    response. A temperature less than 1 favors more correctness and is appropriate
+    for question answering or summarization. A value closer to 1 introduces more
+    randomness in the output.
+    """
 
     top_k: int
     """
-    The `top_k` parameter is used to limit the number of choices for the next
-    predicted word or token.
+    An integer that's used to limit the number of choices for the next predicted
+    word or token. It specifies the maximum number of tokens to consider at each
+    step, based on their probability of occurrence. This technique helps to speed up
+    the generation process and can improve the quality of the generated text by
+    focusing on the most likely options.
     """
 
     top_p: float
     """
-    The `top_p` (nucleus) parameter is used to dynamically adjust the number of
-    choices for each predicted token based on the cumulative probabilities.
+    A percentage (also called the nucleus parameter) that's used to dynamically
+    adjust the number of choices for each predicted token based on the cumulative
+    probabilities. It specifies a probability threshold below which all less likely
+    tokens are filtered out. This technique helps maintain diversity and generate
+    more fluent and natural-sounding text.
     """
 
 
 class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
     stream: Literal[False]
-    """If set, tokens are returned as Server-Sent Events as they are made available.
-
-    Stream terminates with `data: [DONE]`
+    """
+    If true, stream tokens as Server-Sent Events as the model generates them instead
+    of waiting for the full model response. The stream terminates with
+    `data: [DONE]`. If false, return a single JSON object containing the results.
     """
 
 
 class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
     stream: Required[Literal[True]]
-    """If set, tokens are returned as Server-Sent Events as they are made available.
-
-    Stream terminates with `data: [DONE]`
+    """
+    If true, stream tokens as Server-Sent Events as the model generates them instead
+    of waiting for the full model response. The stream terminates with
+    `data: [DONE]`. If false, return a single JSON object containing the results.
     """
 
 

From 3e9827b08f2698029e31df3d770d7f873b9d610d Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 26 Jun 2024 17:26:31 +0000
Subject: [PATCH 04/10] feat(api): OpenAPI spec update via Stainless API (#5)

---
 .stats.yml                                    |  2 +-
 README.md                                     |  2 +-
 bin/publish-pypi                              |  3 ++
 src/together/resources/chat/completions.py    | 10 ++++++
 src/together/types/__init__.py                |  1 +
 src/together/types/chat/chat_completion.py    | 33 ++++++++++++++-----
 .../types/chat/chat_completion_chunk.py       | 32 +++++++++++-------
 .../types/chat/completion_create_params.py    | 11 +++++++
 src/together/types/completion.py              |  2 +-
 src/together/types/fine_tune_event.py         |  3 +-
 src/together/types/log_probs.py               | 10 +++++-
 src/together/types/tool_choice.py             | 23 +++++++++++++
 src/together/types/tool_choice_param.py       | 14 +++++---
 tests/api_resources/chat/test_completions.py  |  4 +++
 14 files changed, 121 insertions(+), 29 deletions(-)
 create mode 100644 src/together/types/tool_choice.py

diff --git a/.stats.yml b/.stats.yml
index d6da9ca3..cf0d3ce7 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 15
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-33661dd8fd4c26ecd595dee22e2c9274e6c4699ad8de5ece233e0d37376c6b7c.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-e8f4e11a2e3927c75dce42c913ef5c9adcf2aef3d3b1312b4825d9f135413c39.yml
diff --git a/README.md b/README.md
index e52c2070..70154594 100644
--- a/README.md
+++ b/README.md
@@ -362,7 +362,7 @@ You can directly override the [httpx client](https://www.python-httpx.org/api/#c
 
 - Support for proxies
 - Custom transports
-- Additional [advanced](https://www.python-httpx.org/advanced/#client-instances) functionality
+- Additional [advanced](https://www.python-httpx.org/advanced/clients/) functionality
 
 ```python
 from together import Together, DefaultHttpxClient
diff --git a/bin/publish-pypi b/bin/publish-pypi
index 826054e9..05bfccbb 100644
--- a/bin/publish-pypi
+++ b/bin/publish-pypi
@@ -3,4 +3,7 @@
 set -eux
 mkdir -p dist
 rye build --clean
+# Patching importlib-metadata version until upstream library version is updated
+# https://github.com/pypa/twine/issues/977#issuecomment-2189800841
+"$HOME/.rye/self/bin/python3" -m pip install 'importlib-metadata==7.2.1'
 rye publish --yes --token=$PYPI_TOKEN
diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py
index 0d69a7c8..5d698125 100644
--- a/src/together/resources/chat/completions.py
+++ b/src/together/resources/chat/completions.py
@@ -50,6 +50,7 @@ def create(
         model: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
@@ -161,6 +162,7 @@ def create(
         stream: Literal[True],
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
@@ -271,6 +273,7 @@ def create(
         stream: bool,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
@@ -380,6 +383,7 @@ def create(
         model: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
@@ -411,6 +415,7 @@ def create(
                     "model": model,
                     "echo": echo,
                     "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
                     "max_tokens": max_tokens,
@@ -456,6 +461,7 @@ async def create(
         model: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
@@ -567,6 +573,7 @@ async def create(
         stream: Literal[True],
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
@@ -677,6 +684,7 @@ async def create(
         stream: bool,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
@@ -786,6 +794,7 @@ async def create(
         model: str,
         echo: bool | NotGiven = NOT_GIVEN,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
         logprobs: int | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
@@ -817,6 +826,7 @@ async def create(
                     "model": model,
                     "echo": echo,
                     "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
                     "max_tokens": max_tokens,
diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py
index ab0e2f56..cc25b8a0 100644
--- a/src/together/types/__init__.py
+++ b/src/together/types/__init__.py
@@ -7,6 +7,7 @@
 from .log_probs import LogProbs as LogProbs
 from .completion import Completion as Completion
 from .image_file import ImageFile as ImageFile
+from .tool_choice import ToolChoice as ToolChoice
 from .tools_param import ToolsParam as ToolsParam
 from .fine_tune_event import FineTuneEvent as FineTuneEvent
 from .tool_choice_param import ToolChoiceParam as ToolChoiceParam
diff --git a/src/together/types/chat/chat_completion.py b/src/together/types/chat/chat_completion.py
index 19fe4a27..ebe6722a 100644
--- a/src/together/types/chat/chat_completion.py
+++ b/src/together/types/chat/chat_completion.py
@@ -5,34 +5,51 @@
 
 from ..._models import BaseModel
 from ..log_probs import LogProbs
+from ..tool_choice import ToolChoice
 from .chat_completion_usage import ChatCompletionUsage
 
-__all__ = ["ChatCompletion", "Choice", "ChoiceMessage"]
+__all__ = ["ChatCompletion", "Choice", "ChoiceMessage", "ChoiceMessageFunctionCall"]
+
+
+class ChoiceMessageFunctionCall(BaseModel):
+    arguments: str
+
+    name: str
 
 
 class ChoiceMessage(BaseModel):
     content: Optional[str] = None
 
-    role: Optional[str] = None
+    role: Literal["assistant"]
+
+    function_call: Optional[ChoiceMessageFunctionCall] = None
+
+    tool_calls: Optional[List[ToolChoice]] = None
 
 
 class Choice(BaseModel):
-    finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls"]] = None
+    finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls", "function_call"]] = None
+
+    index: Optional[int] = None
 
     logprobs: Optional[LogProbs] = None
 
     message: Optional[ChoiceMessage] = None
 
+    seed: Optional[int] = None
+
+    text: Optional[str] = None
+
 
 class ChatCompletion(BaseModel):
-    id: Optional[str] = None
+    id: str
 
-    choices: Optional[List[Choice]] = None
+    choices: List[Choice]
 
-    created: Optional[int] = None
+    created: int
 
-    model: Optional[str] = None
+    model: str
 
-    object: Optional[Literal["chat.completion"]] = None
+    object: Literal["chat.completion"]
 
     usage: Optional[ChatCompletionUsage] = None
diff --git a/src/together/types/chat/chat_completion_chunk.py b/src/together/types/chat/chat_completion_chunk.py
index df42bc2c..5107833d 100644
--- a/src/together/types/chat/chat_completion_chunk.py
+++ b/src/together/types/chat/chat_completion_chunk.py
@@ -4,42 +4,52 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from ..log_probs import LogProbs
+from ..tool_choice import ToolChoice
 from .chat_completion_usage import ChatCompletionUsage
 
-__all__ = ["ChatCompletionChunk", "Token", "Choice", "ChoiceDelta"]
+__all__ = ["ChatCompletionChunk", "Choice", "ChoiceDelta", "ChoiceDeltaFunctionCall"]
 
 
-class Token(BaseModel):
-    id: int
+class ChoiceDeltaFunctionCall(BaseModel):
+    arguments: str
 
-    logprob: float
+    name: str
 
-    special: bool
 
-    text: str
+class ChoiceDelta(BaseModel):
+    content: Optional[str] = None
 
+    function_call: Optional[ChoiceDeltaFunctionCall] = None
 
-class ChoiceDelta(BaseModel):
-    content: str
+    role: Optional[Literal["system", "user", "assistant", "function", "tool"]] = None
+
+    token_id: Optional[int] = None
+
+    tool_calls: Optional[List[ToolChoice]] = None
 
 
 class Choice(BaseModel):
     delta: ChoiceDelta
 
+    finish_reason: Literal["stop", "eos", "length", "tool_calls", "function_call"]
+
     index: int
 
+    logprobs: Optional[LogProbs] = None
+
 
 class ChatCompletionChunk(BaseModel):
     id: str
 
-    token: Token
-
     choices: List[Choice]
 
     created: int
 
+    model: str
+
     object: Literal["chat.completion.chunk"]
 
-    finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls"]] = None
+    system_fingerprint: Optional[str] = None
 
     usage: Optional[ChatCompletionUsage] = None
diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py
index 8b57ef5b..99963d95 100644
--- a/src/together/types/chat/completion_create_params.py
+++ b/src/together/types/chat/completion_create_params.py
@@ -11,6 +11,8 @@
 __all__ = [
     "CompletionCreateParamsBase",
     "Message",
+    "FunctionCall",
+    "FunctionCallName",
     "ResponseFormat",
     "ToolChoice",
     "CompletionCreateParamsNonStreaming",
@@ -37,6 +39,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     repeating tokens that have already been mentioned.
     """
 
+    function_call: FunctionCall
+
     logit_bias: Dict[str, float]
     """Adjusts the likelihood of specific tokens appearing in the generated output."""
 
@@ -133,6 +137,13 @@ class Message(TypedDict, total=False):
     """The role of the messages author. Choice between: system, user, or assistant."""
 
 
+class FunctionCallName(TypedDict, total=False):
+    name: Required[str]
+
+
+FunctionCall = Union[Literal["none", "auto"], FunctionCallName]
+
+
 class ResponseFormat(TypedDict, total=False):
     schema: Dict[str, str]
     """The schema of the response format."""
diff --git a/src/together/types/completion.py b/src/together/types/completion.py
index b1ea84db..98ce814f 100644
--- a/src/together/types/completion.py
+++ b/src/together/types/completion.py
@@ -11,7 +11,7 @@
 
 
 class Choice(BaseModel):
-    finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls"]] = None
+    finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls", "function_call"]] = None
 
     logprobs: Optional[LogProbs] = None
 
diff --git a/src/together/types/fine_tune_event.py b/src/together/types/fine_tune_event.py
index 09d6e795..bc98905f 100644
--- a/src/together/types/fine_tune_event.py
+++ b/src/together/types/fine_tune_event.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
 from typing import List, Optional
 from typing_extensions import Literal
 
@@ -64,7 +63,7 @@ class Data(BaseModel):
 
     wandb_url: str
 
-    level: Optional[builtins.object] = None
+    level: Optional[Literal["info", "warning", "error", "legacy_info", "legacy_iwarning", "legacy_ierror"]] = None
 
 
 class FineTuneEvent(BaseModel):
diff --git a/src/together/types/log_probs.py b/src/together/types/log_probs.py
index a67d3607..a9c408a0 100644
--- a/src/together/types/log_probs.py
+++ b/src/together/types/log_probs.py
@@ -4,10 +4,18 @@
 
 from .._models import BaseModel
 
-__all__ = ["LogProbs"]
+__all__ = ["LogProbs", "Content"]
+
+
+class Content(BaseModel):
+    token: str
+
+    logprob: float
 
 
 class LogProbs(BaseModel):
+    content: Optional[List[Content]] = None
+
     token_logprobs: Optional[List[float]] = None
     """List of token log probabilities"""
 
diff --git a/src/together/types/tool_choice.py b/src/together/types/tool_choice.py
new file mode 100644
index 00000000..d48c79c6
--- /dev/null
+++ b/src/together/types/tool_choice.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["ToolChoice", "Function"]
+
+
+class Function(BaseModel):
+    arguments: str
+
+    name: str
+
+
+class ToolChoice(BaseModel):
+    id: str
+
+    function: Function
+
+    index: float
+
+    type: Literal["function"]
diff --git a/src/together/types/tool_choice_param.py b/src/together/types/tool_choice_param.py
index 241b8471..98b759fe 100644
--- a/src/together/types/tool_choice_param.py
+++ b/src/together/types/tool_choice_param.py
@@ -2,16 +2,22 @@
 
 from __future__ import annotations
 
-from typing_extensions import TypedDict
+from typing_extensions import Literal, Required, TypedDict
 
 __all__ = ["ToolChoiceParam", "Function"]
 
 
 class Function(TypedDict, total=False):
-    name: str
+    arguments: Required[str]
+
+    name: Required[str]
 
 
 class ToolChoiceParam(TypedDict, total=False):
-    function: Function
+    id: Required[str]
+
+    function: Required[Function]
+
+    index: Required[float]
 
-    type: str
+    type: Required[Literal["function"]]
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index f35a3e90..28a2cc9b 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -58,6 +58,7 @@ def test_method_create_with_all_params_overload_1(self, client: Together) -> Non
             model="mistralai/Mixtral-8x7B-Instruct-v0.1",
             echo=True,
             frequency_penalty=0,
+            function_call="none",
             logit_bias={
                 "105": 21.4,
                 "1024": -10.5,
@@ -203,6 +204,7 @@ def test_method_create_with_all_params_overload_2(self, client: Together) -> Non
             stream=True,
             echo=True,
             frequency_penalty=0,
+            function_call="none",
             logit_bias={
                 "105": 21.4,
                 "1024": -10.5,
@@ -350,6 +352,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             model="mistralai/Mixtral-8x7B-Instruct-v0.1",
             echo=True,
             frequency_penalty=0,
+            function_call="none",
             logit_bias={
                 "105": 21.4,
                 "1024": -10.5,
@@ -495,6 +498,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             stream=True,
             echo=True,
             frequency_penalty=0,
+            function_call="none",
             logit_bias={
                 "105": 21.4,
                 "1024": -10.5,

From a25a797f7f7d473ff3f2a939179e6576ec02f891 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 2 Jul 2024 14:04:43 +0000
Subject: [PATCH 05/10] feat(api): manual updates (#6)

---
 .gitignore                         |  1 +
 README.md                          |  2 +-
 pyproject.toml                     | 16 ++++++++++++++
 requirements-dev.lock              | 10 ++++++++-
 requirements.lock                  |  3 ++-
 src/together/_client.py            |  4 ++--
 src/together/_models.py            | 27 ++++++++++++++++++++++++
 src/together/_utils/__init__.py    |  5 ++++-
 src/together/_utils/_reflection.py | 34 ++++++++++++++++++++++++++++++
 tests/test_client.py               |  4 ++--
 10 files changed, 98 insertions(+), 8 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0f9a66a9..87797408 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.prism.log
 .vscode
 _dev
 
diff --git a/README.md b/README.md
index 70154594..9aba58bb 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ It is generated with [Stainless](https://www.stainlessapi.com/).
 
 ## Documentation
 
-The REST API documentation can be found [on docs.together.ai](https://docs.together.ai/). The full API of this library can be found in [api.md](api.md).
+The REST API documentation can be found [on docs.together.ai](https://docs.together.ai). The full API of this library can be found in [api.md](api.md).
 
 ## Installation
 
diff --git a/pyproject.toml b/pyproject.toml
index b834a03d..3d54bfb5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,6 +58,7 @@ dev-dependencies = [
     "nox",
     "dirty-equals>=0.6.0",
     "importlib-metadata>=6.7.0",
+    "rich>=13.7.1",
 
 ]
 
@@ -99,6 +100,21 @@ include = [
 [tool.hatch.build.targets.wheel]
 packages = ["src/together"]
 
+[tool.hatch.build.targets.sdist]
+# Basically everything except hidden files/directories (such as .github, .devcontainers, .python-version, etc)
+include = [
+  "/*.toml",
+  "/*.json",
+  "/*.lock",
+  "/*.md",
+  "/mypy.ini",
+  "/noxfile.py",
+  "bin/*",
+  "examples/*",
+  "src/*",
+  "tests/*",
+]
+
 [tool.hatch.metadata.hooks.fancy-pypi-readme]
 content-type = "text/markdown"
 
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 0c449dc6..826c91cb 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -10,7 +10,7 @@
 -e file:.
 annotated-types==0.6.0
     # via pydantic
-anyio==4.1.0
+anyio==4.4.0
     # via httpx
     # via together
 argcomplete==3.1.2
@@ -44,6 +44,10 @@ idna==3.4
 importlib-metadata==7.0.0
 iniconfig==2.0.0
     # via pytest
+markdown-it-py==3.0.0
+    # via rich
+mdurl==0.1.2
+    # via markdown-it-py
 mypy==1.7.1
 mypy-extensions==1.0.0
     # via mypy
@@ -63,6 +67,8 @@ pydantic==2.7.1
     # via together
 pydantic-core==2.18.2
     # via pydantic
+pygments==2.18.0
+    # via rich
 pyright==1.1.364
 pytest==7.1.1
     # via pytest-asyncio
@@ -72,6 +78,7 @@ python-dateutil==2.8.2
 pytz==2023.3.post1
     # via dirty-equals
 respx==0.20.2
+rich==13.7.1
 ruff==0.1.9
 setuptools==68.2.2
     # via nodeenv
@@ -86,6 +93,7 @@ tomli==2.0.1
     # via mypy
     # via pytest
 typing-extensions==4.8.0
+    # via anyio
     # via mypy
     # via pydantic
     # via pydantic-core
diff --git a/requirements.lock b/requirements.lock
index d2e2cf38..a6341f0f 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -10,7 +10,7 @@
 -e file:.
 annotated-types==0.6.0
     # via pydantic
-anyio==4.1.0
+anyio==4.4.0
     # via httpx
     # via together
 certifi==2023.7.22
@@ -38,6 +38,7 @@ sniffio==1.3.0
     # via httpx
     # via together
 typing-extensions==4.8.0
+    # via anyio
     # via pydantic
     # via pydantic-core
     # via together
diff --git a/src/together/_client.py b/src/together/_client.py
index 77e8d83a..9e4fd0c3 100644
--- a/src/together/_client.py
+++ b/src/together/_client.py
@@ -131,7 +131,7 @@ def qs(self) -> Querystring:
     @override
     def auth_headers(self) -> dict[str, str]:
         api_key = self.api_key
-        return {"Authorization": api_key}
+        return {"Authorization": f"Bearer {api_key}"}
 
     @property
     @override
@@ -313,7 +313,7 @@ def qs(self) -> Querystring:
     @override
     def auth_headers(self) -> dict[str, str]:
         api_key = self.api_key
-        return {"Authorization": api_key}
+        return {"Authorization": f"Bearer {api_key}"}
 
     @property
     @override
diff --git a/src/together/_models.py b/src/together/_models.py
index 75c68cc7..5d95bb4b 100644
--- a/src/together/_models.py
+++ b/src/together/_models.py
@@ -10,6 +10,7 @@
     ClassVar,
     Protocol,
     Required,
+    ParamSpec,
     TypedDict,
     TypeGuard,
     final,
@@ -67,6 +68,9 @@
 __all__ = ["BaseModel", "GenericModel"]
 
 _T = TypeVar("_T")
+_BaseModelT = TypeVar("_BaseModelT", bound="BaseModel")
+
+P = ParamSpec("P")
 
 
 @runtime_checkable
@@ -379,6 +383,29 @@ def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericMo
     return issubclass(origin, BaseModel) or issubclass(origin, GenericModel)
 
 
+def build(
+    base_model_cls: Callable[P, _BaseModelT],
+    *args: P.args,
+    **kwargs: P.kwargs,
+) -> _BaseModelT:
+    """Construct a BaseModel class without validation.
+
+    This is useful for cases where you need to instantiate a `BaseModel`
+    from an API response as this provides type-safe params which isn't supported
+    by helpers like `construct_type()`.
+
+    ```py
+    build(MyModel, my_field_a="foo", my_field_b=123)
+    ```
+    """
+    if args:
+        raise TypeError(
+            "Received positional arguments which are not supported; Keyword arguments must be used instead",
+        )
+
+    return cast(_BaseModelT, construct_type(type_=base_model_cls, value=kwargs))
+
+
 def construct_type(*, value: object, type_: object) -> object:
     """Loose coercion to the expected type with construction of nested values.
 
diff --git a/src/together/_utils/__init__.py b/src/together/_utils/__init__.py
index 667e2473..3efe66c8 100644
--- a/src/together/_utils/__init__.py
+++ b/src/together/_utils/__init__.py
@@ -49,4 +49,7 @@
     maybe_transform as maybe_transform,
     async_maybe_transform as async_maybe_transform,
 )
-from ._reflection import function_has_argument as function_has_argument
+from ._reflection import (
+    function_has_argument as function_has_argument,
+    assert_signatures_in_sync as assert_signatures_in_sync,
+)
diff --git a/src/together/_utils/_reflection.py b/src/together/_utils/_reflection.py
index e134f58e..9a53c7bd 100644
--- a/src/together/_utils/_reflection.py
+++ b/src/together/_utils/_reflection.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import inspect
 from typing import Any, Callable
 
@@ -6,3 +8,35 @@ def function_has_argument(func: Callable[..., Any], arg_name: str) -> bool:
     """Returns whether or not the given function has a specific parameter"""
     sig = inspect.signature(func)
     return arg_name in sig.parameters
+
+
+def assert_signatures_in_sync(
+    source_func: Callable[..., Any],
+    check_func: Callable[..., Any],
+    *,
+    exclude_params: set[str] = set(),
+) -> None:
+    """Ensure that the signature of the second function matches the first."""
+
+    check_sig = inspect.signature(check_func)
+    source_sig = inspect.signature(source_func)
+
+    errors: list[str] = []
+
+    for name, source_param in source_sig.parameters.items():
+        if name in exclude_params:
+            continue
+
+        custom_param = check_sig.parameters.get(name)
+        if not custom_param:
+            errors.append(f"the `{name}` param is missing")
+            continue
+
+        if custom_param.annotation != source_param.annotation:
+            errors.append(
+                f"types for the `{name}` param are do not match; source={repr(source_param.annotation)} checking={repr(source_param.annotation)}"
+            )
+            continue
+
+    if errors:
+        raise AssertionError(f"{len(errors)} errors encountered when comparing signatures:\n\n" + "\n\n".join(errors))
diff --git a/tests/test_client.py b/tests/test_client.py
index 2e3679a9..b09a8db0 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -332,7 +332,7 @@ def test_default_headers_option(self) -> None:
     def test_validate_headers(self) -> None:
         client = Together(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
-        assert request.headers.get("Authorization") == api_key
+        assert request.headers.get("Authorization") == f"Bearer {api_key}"
 
         with pytest.raises(TogetherError):
             client2 = Together(base_url=base_url, api_key=None, _strict_response_validation=True)
@@ -1048,7 +1048,7 @@ def test_default_headers_option(self) -> None:
     def test_validate_headers(self) -> None:
         client = AsyncTogether(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
-        assert request.headers.get("Authorization") == api_key
+        assert request.headers.get("Authorization") == f"Bearer {api_key}"
 
         with pytest.raises(TogetherError):
             client2 = AsyncTogether(base_url=base_url, api_key=None, _strict_response_validation=True)

From 6bab9dadd17cacd94565c8f4df25c0ea6f83e987 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Mon, 8 Jul 2024 23:44:47 +0000
Subject: [PATCH 06/10] feat(api): OpenAPI spec update via Stainless API (#7)

---
 .devcontainer/Dockerfile                      |  2 +-
 .github/workflows/ci.yml                      |  4 ++--
 .github/workflows/publish-pypi.yml            |  4 ++--
 .stats.yml                                    |  2 +-
 requirements-dev.lock                         |  1 +
 requirements.lock                             |  1 +
 src/together/_base_client.py                  | 20 +++++++++++++++++--
 src/together/resources/embeddings.py          |  6 ++++--
 src/together/types/embedding_create_params.py |  3 ++-
 9 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 83bca8f7..ac9a2e75 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
 
 USER vscode
 
-RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.24.0" RYE_INSTALL_OPTION="--yes" bash
+RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.35.0" RYE_INSTALL_OPTION="--yes" bash
 ENV PATH=/home/vscode/.rye/shims:$PATH
 
 RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8c339440..257f0561 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,7 +21,7 @@ jobs:
           curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.24.0
+          RYE_VERSION: '0.35.0'
           RYE_INSTALL_OPTION: '--yes'
 
       - name: Install dependencies
@@ -41,7 +41,7 @@ jobs:
           curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.24.0
+          RYE_VERSION: '0.35.0'
           RYE_INSTALL_OPTION: '--yes'
 
       - name: Bootstrap
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 632c0e94..fb499434 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -21,8 +21,8 @@ jobs:
           curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.24.0
-          RYE_INSTALL_OPTION: "--yes"
+          RYE_VERSION: '0.35.0'
+          RYE_INSTALL_OPTION: '--yes'
 
       - name: Publish to PyPI
         run: |
diff --git a/.stats.yml b/.stats.yml
index cf0d3ce7..d40eba14 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 15
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-e8f4e11a2e3927c75dce42c913ef5c9adcf2aef3d3b1312b4825d9f135413c39.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-9ca35fd97a4194757393da2b0049a2e9900fd837a69afecfe01568b83796e299.yml
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 826c91cb..63621c40 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -6,6 +6,7 @@
 #   features: []
 #   all-features: true
 #   with-sources: false
+#   generate-hashes: false
 
 -e file:.
 annotated-types==0.6.0
diff --git a/requirements.lock b/requirements.lock
index a6341f0f..5fd6ad03 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -6,6 +6,7 @@
 #   features: []
 #   all-features: true
 #   with-sources: false
+#   generate-hashes: false
 
 -e file:.
 annotated-types==0.6.0
diff --git a/src/together/_base_client.py b/src/together/_base_client.py
index a806809b..0f9eb67c 100644
--- a/src/together/_base_client.py
+++ b/src/together/_base_client.py
@@ -58,6 +58,7 @@
     HttpxSendArgs,
     AsyncTransport,
     RequestOptions,
+    HttpxRequestFiles,
     ModelBuilderProtocol,
 )
 from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping
@@ -459,6 +460,7 @@ def _build_request(
         headers = self._build_headers(options)
         params = _merge_mappings(self.default_query, options.params)
         content_type = headers.get("Content-Type")
+        files = options.files
 
         # If the given Content-Type header is multipart/form-data then it
         # has to be removed so that httpx can generate the header with
@@ -472,7 +474,7 @@ def _build_request(
                 headers.pop("Content-Type")
 
             # As we are now sending multipart/form-data instead of application/json
-            # we need to tell httpx to use it, https://www.python-httpx.org/advanced/#multipart-file-encoding
+            # we need to tell httpx to use it, https://www.python-httpx.org/advanced/clients/#multipart-file-encoding
             if json_data:
                 if not is_dict(json_data):
                     raise TypeError(
@@ -480,6 +482,15 @@ def _build_request(
                     )
                 kwargs["data"] = self._serialize_multipartform(json_data)
 
+            # httpx determines whether or not to send a "multipart/form-data"
+            # request based on the truthiness of the "files" argument.
+            # This gets around that issue by generating a dict value that
+            # evaluates to true.
+            #
+            # https://github.com/encode/httpx/discussions/2399#discussioncomment-3814186
+            if not files:
+                files = cast(HttpxRequestFiles, ForceMultipartDict())
+
         # TODO: report this error to httpx
         return self._client.build_request(  # pyright: ignore[reportUnknownMemberType]
             headers=headers,
@@ -492,7 +503,7 @@ def _build_request(
             # https://github.com/microsoft/pyright/issues/3526#event-6715453066
             params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None,
             json=json_data,
-            files=options.files,
+            files=files,
             **kwargs,
         )
 
@@ -1863,6 +1874,11 @@ def make_request_options(
     return options
 
 
+class ForceMultipartDict(Dict[str, None]):
+    def __bool__(self) -> bool:
+        return True
+
+
 class OtherPlatform:
     def __init__(self, name: str) -> None:
         self.name = name
diff --git a/src/together/resources/embeddings.py b/src/together/resources/embeddings.py
index 290c574d..d1fda42c 100644
--- a/src/together/resources/embeddings.py
+++ b/src/together/resources/embeddings.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+from typing import List, Union
+
 import httpx
 
 from ..types import embedding_create_params
@@ -38,7 +40,7 @@ def with_streaming_response(self) -> EmbeddingsResourceWithStreamingResponse:
     def create(
         self,
         *,
-        input: str,
+        input: Union[str, List[str]],
         model: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -91,7 +93,7 @@ def with_streaming_response(self) -> AsyncEmbeddingsResourceWithStreamingRespons
     async def create(
         self,
         *,
-        input: str,
+        input: Union[str, List[str]],
         model: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
diff --git a/src/together/types/embedding_create_params.py b/src/together/types/embedding_create_params.py
index 11f45431..ed80b3e6 100644
--- a/src/together/types/embedding_create_params.py
+++ b/src/together/types/embedding_create_params.py
@@ -2,13 +2,14 @@
 
 from __future__ import annotations
 
+from typing import List, Union
 from typing_extensions import Required, TypedDict
 
 __all__ = ["EmbeddingCreateParams"]
 
 
 class EmbeddingCreateParams(TypedDict, total=False):
-    input: Required[str]
+    input: Required[Union[str, List[str]]]
     """A string providing the text for the model to embed."""
 
     model: Required[str]

From a7584db12d26cc55833ade61dae8ec29878d5ed1 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 9 Jul 2024 20:48:54 +0000
Subject: [PATCH 07/10] feat(api): OpenAPI spec update via Stainless API (#8)

---
 .stats.yml                   |  2 +-
 src/together/_base_client.py | 22 ++++++++++++++++------
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/.stats.yml b/.stats.yml
index d40eba14..f52da739 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 15
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-9ca35fd97a4194757393da2b0049a2e9900fd837a69afecfe01568b83796e299.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-9bc2a14025495e7ec3f0959f90457cdb215d4fe285151c29dcff03a47796d33a.yml
diff --git a/src/together/_base_client.py b/src/together/_base_client.py
index 0f9eb67c..3a1eacee 100644
--- a/src/together/_base_client.py
+++ b/src/together/_base_client.py
@@ -955,6 +955,11 @@ def _request(
         stream: bool,
         stream_cls: type[_StreamT] | None,
     ) -> ResponseT | _StreamT:
+        # create a copy of the options we were given so that if the
+        # options are mutated later & we then retry, the retries are
+        # given the original options
+        input_options = model_copy(options)
+
         cast_to = self._maybe_override_cast_to(cast_to, options)
         self._prepare_options(options)
 
@@ -979,7 +984,7 @@ def _request(
 
             if retries > 0:
                 return self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
                     retries,
                     stream=stream,
@@ -994,7 +999,7 @@ def _request(
 
             if retries > 0:
                 return self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
                     retries,
                     stream=stream,
@@ -1022,7 +1027,7 @@ def _request(
             if retries > 0 and self._should_retry(err.response):
                 err.response.close()
                 return self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
                     retries,
                     err.response.headers,
@@ -1518,6 +1523,11 @@ async def _request(
             # execute it earlier while we are in an async context
             self._platform = await asyncify(get_platform)()
 
+        # create a copy of the options we were given so that if the
+        # options are mutated later & we then retry, the retries are
+        # given the original options
+        input_options = model_copy(options)
+
         cast_to = self._maybe_override_cast_to(cast_to, options)
         await self._prepare_options(options)
 
@@ -1540,7 +1550,7 @@ async def _request(
 
             if retries > 0:
                 return await self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
                     retries,
                     stream=stream,
@@ -1555,7 +1565,7 @@ async def _request(
 
             if retries > 0:
                 return await self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
                     retries,
                     stream=stream,
@@ -1578,7 +1588,7 @@ async def _request(
             if retries > 0 and self._should_retry(err.response):
                 await err.response.aclose()
                 return await self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
                     retries,
                     err.response.headers,

From 04877a01b5a9dd3988ff8283c665fad4ca0c643a Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 16:51:12 +0000
Subject: [PATCH 08/10] feat(api): OpenAPI spec update via Stainless API (#9)

---
 .github/workflows/ci.yml                         |  1 +
 .stats.yml                                       |  2 +-
 README.md                                        |  2 +-
 requirements-dev.lock                            |  2 +-
 src/together/_base_client.py                     | 12 ++++++------
 src/together/_compat.py                          |  6 +++---
 src/together/_models.py                          |  8 ++++++++
 src/together/types/chat/chat_completion_usage.py |  1 -
 8 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 257f0561..40293964 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,6 +6,7 @@ on:
   pull_request:
     branches:
       - main
+      - next
 
 jobs:
   lint:
diff --git a/.stats.yml b/.stats.yml
index f52da739..29957ee8 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 15
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-9bc2a14025495e7ec3f0959f90457cdb215d4fe285151c29dcff03a47796d33a.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-bd614fbf1c23ddda5eba5554c84321d7c0e462f19465660c259d50c242fe8be8.yml
diff --git a/README.md b/README.md
index 9aba58bb..c993f3e1 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ It is generated with [Stainless](https://www.stainlessapi.com/).
 
 ## Documentation
 
-The REST API documentation can be found [on docs.together.ai](https://docs.together.ai). The full API of this library can be found in [api.md](api.md).
+The REST API documentation can be found on [docs.together.ai](https://docs.together.ai). The full API of this library can be found in [api.md](api.md).
 
 ## Installation
 
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 63621c40..78809391 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -49,7 +49,7 @@ markdown-it-py==3.0.0
     # via rich
 mdurl==0.1.2
     # via markdown-it-py
-mypy==1.7.1
+mypy==1.10.1
 mypy-extensions==1.0.0
     # via mypy
 nodeenv==1.8.0
diff --git a/src/together/_base_client.py b/src/together/_base_client.py
index 3a1eacee..8a36d721 100644
--- a/src/together/_base_client.py
+++ b/src/together/_base_client.py
@@ -879,9 +879,9 @@ def __exit__(
     def _prepare_options(
         self,
         options: FinalRequestOptions,  # noqa: ARG002
-    ) -> None:
+    ) -> FinalRequestOptions:
         """Hook for mutating the given options"""
-        return None
+        return options
 
     def _prepare_request(
         self,
@@ -961,7 +961,7 @@ def _request(
         input_options = model_copy(options)
 
         cast_to = self._maybe_override_cast_to(cast_to, options)
-        self._prepare_options(options)
+        options = self._prepare_options(options)
 
         retries = self._remaining_retries(remaining_retries, options)
         request = self._build_request(options)
@@ -1442,9 +1442,9 @@ async def __aexit__(
     async def _prepare_options(
         self,
         options: FinalRequestOptions,  # noqa: ARG002
-    ) -> None:
+    ) -> FinalRequestOptions:
         """Hook for mutating the given options"""
-        return None
+        return options
 
     async def _prepare_request(
         self,
@@ -1529,7 +1529,7 @@ async def _request(
         input_options = model_copy(options)
 
         cast_to = self._maybe_override_cast_to(cast_to, options)
-        await self._prepare_options(options)
+        options = await self._prepare_options(options)
 
         retries = self._remaining_retries(remaining_retries, options)
         request = self._build_request(options)
diff --git a/src/together/_compat.py b/src/together/_compat.py
index 74c7639b..c919b5ad 100644
--- a/src/together/_compat.py
+++ b/src/together/_compat.py
@@ -118,10 +118,10 @@ def get_model_fields(model: type[pydantic.BaseModel]) -> dict[str, FieldInfo]:
     return model.__fields__  # type: ignore
 
 
-def model_copy(model: _ModelT) -> _ModelT:
+def model_copy(model: _ModelT, *, deep: bool = False) -> _ModelT:
     if PYDANTIC_V2:
-        return model.model_copy()
-    return model.copy()  # type: ignore
+        return model.model_copy(deep=deep)
+    return model.copy(deep=deep)  # type: ignore
 
 
 def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
diff --git a/src/together/_models.py b/src/together/_models.py
index 5d95bb4b..eb7ce3bd 100644
--- a/src/together/_models.py
+++ b/src/together/_models.py
@@ -643,6 +643,14 @@ def validate_type(*, type_: type[_T], value: object) -> _T:
     return cast(_T, _validate_non_model_type(type_=type_, value=value))
 
 
+def set_pydantic_config(typ: Any, config: pydantic.ConfigDict) -> None:
+    """Add a pydantic config for the given type.
+
+    Note: this is a no-op on Pydantic v1.
+    """
+    setattr(typ, "__pydantic_config__", config)  # noqa: B010
+
+
 # our use of subclasssing here causes weirdness for type checkers,
 # so we just pretend that we don't subclass
 if TYPE_CHECKING:
diff --git a/src/together/types/chat/chat_completion_usage.py b/src/together/types/chat/chat_completion_usage.py
index 5e804ab5..82b9d450 100644
--- a/src/together/types/chat/chat_completion_usage.py
+++ b/src/together/types/chat/chat_completion_usage.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from ..._models import BaseModel
 
 __all__ = ["ChatCompletionUsage"]

From af93a5c78aaf2b9bf7f3c42f7ff19e06472ae5de Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 16:54:44 +0000
Subject: [PATCH 09/10] feat(api): OpenAPI spec update via Stainless API (#10)

---
 .stats.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.stats.yml b/.stats.yml
index 29957ee8..229d59da 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 15
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-bd614fbf1c23ddda5eba5554c84321d7c0e462f19465660c259d50c242fe8be8.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-aa19594b663913393bdbc1b56903615e4eb84c6ebc60617ab2f451ede8a730c2.yml

From b3b092c2a0cb4740cd7ec53804bcdaa19d006858 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 16 Jul 2024 16:55:08 +0000
Subject: [PATCH 10/10] release: 0.1.0-alpha.1

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 71 +++++++++++++++++++++++++++++++++++
 pyproject.toml                |  2 +-
 src/together/_version.py      |  2 +-
 4 files changed, 74 insertions(+), 3 deletions(-)
 create mode 100644 CHANGELOG.md

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index c4762802..ba6c3483 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.0.1-alpha.0"
+  ".": "0.1.0-alpha.1"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..7b8f8381
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,71 @@
+# Changelog
+
+## 0.1.0-alpha.1 (2024-07-16)
+
+Full Changelog: [v0.0.1-alpha.0...v0.1.0-alpha.1](https://github.com/togethercomputer/together-py/compare/v0.0.1-alpha.0...v0.1.0-alpha.1)
+
+### Features
+
+* **api:** Config update for pstern-sl/dev ([0a841c4](https://github.com/togethercomputer/together-py/commit/0a841c447d833ee2a6008db4b2ddd4b75eb47fbd))
+* **api:** manual updates ([d43927b](https://github.com/togethercomputer/together-py/commit/d43927b37622bb7d233a178eceb21b2223bba1bc))
+* **api:** manual updates ([94cfef7](https://github.com/togethercomputer/together-py/commit/94cfef7ff7d499fc2e8dd7b1ad4fed9e908cd28a))
+* **api:** manual updates ([#6](https://github.com/togethercomputer/together-py/issues/6)) ([a25a797](https://github.com/togethercomputer/together-py/commit/a25a797f7f7d473ff3f2a939179e6576ec02f891))
+* **api:** OpenAPI spec update via Stainless API ([a78681d](https://github.com/togethercomputer/together-py/commit/a78681d3a8ea469844936ac4793f0a374a4d1af1))
+* **api:** OpenAPI spec update via Stainless API ([9d54568](https://github.com/togethercomputer/together-py/commit/9d54568072bbaef6b99bd0fbc54c451144f2e1f5))
+* **api:** OpenAPI spec update via Stainless API ([00c8693](https://github.com/togethercomputer/together-py/commit/00c86934ed1ab85f0ed1cbc5ecb069d94366b2cd))
+* **api:** OpenAPI spec update via Stainless API ([8609a6e](https://github.com/togethercomputer/together-py/commit/8609a6e8d13b50bf22ec67d0149c9ab51f5dea0e))
+* **api:** OpenAPI spec update via Stainless API ([3dc55d1](https://github.com/togethercomputer/together-py/commit/3dc55d1f4cd41e5a4054bd2a43a5595373db150c))
+* **api:** OpenAPI spec update via Stainless API ([add76c7](https://github.com/togethercomputer/together-py/commit/add76c7c0ef977dadc3b23f54c784a7f62b81528))
+* **api:** OpenAPI spec update via Stainless API ([5eaa129](https://github.com/togethercomputer/together-py/commit/5eaa1290359411361b99008695d2c786507d2073))
+* **api:** OpenAPI spec update via Stainless API ([d229eef](https://github.com/togethercomputer/together-py/commit/d229eeffe4022374b4d2fd9df208afe4c0fd21bb))
+* **api:** OpenAPI spec update via Stainless API ([643f5cf](https://github.com/togethercomputer/together-py/commit/643f5cfc1d6c3d4d1c77e2c6f27411c5df0845df))
+* **api:** OpenAPI spec update via Stainless API ([9ae4e1b](https://github.com/togethercomputer/together-py/commit/9ae4e1bf74193c6cc8d1509f3b05d816e5e071b4))
+* **api:** OpenAPI spec update via Stainless API ([#10](https://github.com/togethercomputer/together-py/issues/10)) ([af93a5c](https://github.com/togethercomputer/together-py/commit/af93a5c78aaf2b9bf7f3c42f7ff19e06472ae5de))
+* **api:** OpenAPI spec update via Stainless API ([#3](https://github.com/togethercomputer/together-py/issues/3)) ([cd703fb](https://github.com/togethercomputer/together-py/commit/cd703fbdb178f4f05ffc43af0e86f5218537ce5c))
+* **api:** OpenAPI spec update via Stainless API ([#4](https://github.com/togethercomputer/together-py/issues/4)) ([00ef6cc](https://github.com/togethercomputer/together-py/commit/00ef6cc33f844ef3d214e805f3bdfa28240905b7))
+* **api:** OpenAPI spec update via Stainless API ([#5](https://github.com/togethercomputer/together-py/issues/5)) ([3e9827b](https://github.com/togethercomputer/together-py/commit/3e9827b08f2698029e31df3d770d7f873b9d610d))
+* **api:** OpenAPI spec update via Stainless API ([#7](https://github.com/togethercomputer/together-py/issues/7)) ([6bab9da](https://github.com/togethercomputer/together-py/commit/6bab9dadd17cacd94565c8f4df25c0ea6f83e987))
+* **api:** OpenAPI spec update via Stainless API ([#8](https://github.com/togethercomputer/together-py/issues/8)) ([a7584db](https://github.com/togethercomputer/together-py/commit/a7584db12d26cc55833ade61dae8ec29878d5ed1))
+* **api:** OpenAPI spec update via Stainless API ([#9](https://github.com/togethercomputer/together-py/issues/9)) ([04877a0](https://github.com/togethercomputer/together-py/commit/04877a01b5a9dd3988ff8283c665fad4ca0c643a))
+* **api:** rename api key ([b7b55e6](https://github.com/togethercomputer/together-py/commit/b7b55e632590fbe2425be79f332352ba8367e365))
+* **api:** update via SDK Studio ([5866250](https://github.com/togethercomputer/together-py/commit/58662506963afd2ed777fa3efa9f35263689437c))
+* **api:** update via SDK Studio ([27bbc3c](https://github.com/togethercomputer/together-py/commit/27bbc3c53d9e8849d7e7099bee417ef99260eece))
+* **api:** update via SDK Studio ([f7c11ec](https://github.com/togethercomputer/together-py/commit/f7c11ecec9f83889385b710e8270f9159f013bb1))
+* **api:** update via SDK Studio ([22a5f1f](https://github.com/togethercomputer/together-py/commit/22a5f1f01c5dea75a28763bcb991e5276ed9efa4))
+* **api:** update via SDK Studio ([159534b](https://github.com/togethercomputer/together-py/commit/159534b4efeabd8f445037f38af6acd4342c7e7f))
+* **api:** update via SDK Studio ([30663ec](https://github.com/togethercomputer/together-py/commit/30663ec91f215ba7135dd8723e2876cf1bf70dde))
+* **api:** update via SDK Studio ([6561269](https://github.com/togethercomputer/together-py/commit/6561269416ba964bc0b2d452474017cd8036d666))
+* **api:** update via SDK Studio ([72bad68](https://github.com/togethercomputer/together-py/commit/72bad68007c5e595fa65bcff9e268aca93cb0bef))
+* **api:** update via SDK Studio ([59cce01](https://github.com/togethercomputer/together-py/commit/59cce011f234371b089e375cca57f9984ead2a8e))
+* **api:** update via SDK Studio ([b2b0177](https://github.com/togethercomputer/together-py/commit/b2b017748247196d975cdbc51c4fe5bea23b5bbf))
+* **api:** update via SDK Studio ([331cc46](https://github.com/togethercomputer/together-py/commit/331cc4626448b1e5546ae11c4bd0b90f106094c6))
+* **api:** update via SDK Studio ([6a57974](https://github.com/togethercomputer/together-py/commit/6a57974a5ae311f3f0faa917191964c09579c7bd))
+* **api:** update via SDK Studio ([80c35ee](https://github.com/togethercomputer/together-py/commit/80c35ee69b20f6a9b78512be0344e71e0850bb29))
+* **api:** update via SDK Studio ([668c023](https://github.com/togethercomputer/together-py/commit/668c02366615c5b073b29b03e45ae17ffe668bca))
+* **api:** update via SDK Studio ([a592cff](https://github.com/togethercomputer/together-py/commit/a592cffcc08f9831bdd414168b2e57b45ce42c08))
+* **api:** update via SDK Studio ([733f0b0](https://github.com/togethercomputer/together-py/commit/733f0b0917d8627014c2106a510a4b1322fb8927))
+* **api:** update via SDK Studio ([5095404](https://github.com/togethercomputer/together-py/commit/50954043bcc19bad0ffc23207e8074fcc83a6212))
+* **api:** update via SDK Studio ([d3b6a64](https://github.com/togethercomputer/together-py/commit/d3b6a6403251badab836ff9a75d060afb97440cb))
+* **api:** update via SDK Studio ([adf918b](https://github.com/togethercomputer/together-py/commit/adf918b5c13d36d086d42847a249df124cda119b))
+* **api:** update via SDK Studio ([a79da8e](https://github.com/togethercomputer/together-py/commit/a79da8ea98ed471fc23af36c30696fb910cc6657))
+* **api:** update via SDK Studio ([44b426f](https://github.com/togethercomputer/together-py/commit/44b426fca286acecfbe37b1cef802f40ba73496e))
+* **api:** update via SDK Studio ([1f7c7fe](https://github.com/togethercomputer/together-py/commit/1f7c7fe55e6c728c97df57147f5ae9c072f76e3b))
+* **api:** update via SDK Studio ([500e41b](https://github.com/togethercomputer/together-py/commit/500e41b1eb4c960d5e14fe069251ef887f0e4976))
+* **api:** update via SDK Studio ([ca665ed](https://github.com/togethercomputer/together-py/commit/ca665edb80300b97e269976e3f966308afc50e4a))
+* **api:** updates ([3591c56](https://github.com/togethercomputer/together-py/commit/3591c56336cd5a7cd98c23feed5ae5fc737bcafb))
+* update via SDK Studio ([c56e7d1](https://github.com/togethercomputer/together-py/commit/c56e7d1b19533d687c1dd23d35118546699be8b7))
+* update via SDK Studio ([90adf12](https://github.com/togethercomputer/together-py/commit/90adf128d816a262f51c4dcc4a39b6693c7c746f))
+* update via SDK Studio ([b75aa7f](https://github.com/togethercomputer/together-py/commit/b75aa7f8c46573e6047abc7f9bd03bcc6d90cfe7))
+* update via SDK Studio ([48c9e19](https://github.com/togethercomputer/together-py/commit/48c9e1941baade2916cd4bf56becc42e35052d3a))
+* update via SDK Studio ([592853d](https://github.com/togethercomputer/together-py/commit/592853d727033ea9421ed58576ae15325aca535f))
+* update via SDK Studio ([611badd](https://github.com/togethercomputer/together-py/commit/611baddd1f735c4287e052798812a23f61213717))
+* update via SDK Studio ([a84defc](https://github.com/togethercomputer/together-py/commit/a84defc9ab5274d5eafc9190055083322b8fb93f))
+* update via SDK Studio ([3c83f12](https://github.com/togethercomputer/together-py/commit/3c83f120ee2b10c4ec2c0e359eaf9f1968f85dcb))
+* update via SDK Studio ([67d01b0](https://github.com/togethercomputer/together-py/commit/67d01b03b05ee598539b68d70185192862fb0a29))
+* update via SDK Studio ([065b990](https://github.com/togethercomputer/together-py/commit/065b9903a0c0e9eb67a591d51abbb27e08020ef5))
+
+
+### Chores
+
+* go live ([#1](https://github.com/togethercomputer/together-py/issues/1)) ([9c9e672](https://github.com/togethercomputer/together-py/commit/9c9e67276776b7169bd2e9066c6049f5237ed044))
+* update SDK settings ([e082ad6](https://github.com/togethercomputer/together-py/commit/e082ad6d7beff79ae5301f63d7b334aeebc12024))
diff --git a/pyproject.toml b/pyproject.toml
index 3d54bfb5..d771f67d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "together"
-version = "0.0.1-alpha.0"
+version = "0.1.0-alpha.1"
 description = "The official Python library for the together API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/together/_version.py b/src/together/_version.py
index e4f5dac8..2b114bd1 100644
--- a/src/together/_version.py
+++ b/src/together/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "together"
-__version__ = "0.0.1-alpha.0"  # x-release-please-version
+__version__ = "0.1.0-alpha.1"  # x-release-please-version