From 9c9e67276776b7169bd2e9066c6049f5237ed044 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 13 Jun 2024 23:19:50 +0000 Subject: [PATCH 01/10] chore: go live (#1) --- .github/workflows/publish-pypi.yml | 31 +++++++++++++ .github/workflows/release-doctor.yml | 19 ++++++++ .release-please-manifest.json | 3 ++ CONTRIBUTING.md | 4 +- README.md | 13 +++--- bin/check-release-environment | 32 ++++++++++++++ pyproject.toml | 6 +-- release-please-config.json | 66 ++++++++++++++++++++++++++++ src/together/_version.py | 2 +- 9 files changed, 162 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/publish-pypi.yml create mode 100644 .github/workflows/release-doctor.yml create mode 100644 .release-please-manifest.json create mode 100644 bin/check-release-environment create mode 100644 release-please-config.json diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml new file mode 100644 index 00000000..632c0e94 --- /dev/null +++ b/.github/workflows/publish-pypi.yml @@ -0,0 +1,31 @@ +# This workflow is triggered when a GitHub release is created. +# It can also be run manually to re-publish to PyPI in case it failed for some reason. +# You can run this workflow by navigating to https://www.github.com/togethercomputer/together-py/actions/workflows/publish-pypi.yml +name: Publish PyPI +on: + workflow_dispatch: + + release: + types: [published] + +jobs: + publish: + name: publish + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Rye + run: | + curl -sSf https://rye.astral.sh/get | bash + echo "$HOME/.rye/shims" >> $GITHUB_PATH + env: + RYE_VERSION: 0.24.0 + RYE_INSTALL_OPTION: "--yes" + + - name: Publish to PyPI + run: | + bash ./bin/publish-pypi + env: + PYPI_TOKEN: ${{ secrets.TOGETHER_PYPI_TOKEN || secrets.PYPI_TOKEN }} diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml new file mode 100644 index 00000000..5e4a97e0 --- /dev/null +++ b/.github/workflows/release-doctor.yml @@ -0,0 +1,19 @@ +name: Release Doctor +on: + pull_request: + workflow_dispatch: + +jobs: + release_doctor: + name: release doctor + runs-on: ubuntu-latest + if: github.repository == 'togethercomputer/together-py' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next') + + steps: + - uses: actions/checkout@v4 + + - name: Check release environment + run: | + bash ./bin/check-release-environment + env: + PYPI_TOKEN: ${{ secrets.TOGETHER_PYPI_TOKEN || secrets.PYPI_TOKEN }} diff --git a/.release-please-manifest.json b/.release-please-manifest.json new file mode 100644 index 00000000..c4762802 --- /dev/null +++ b/.release-please-manifest.json @@ -0,0 +1,3 @@ +{ + ".": "0.0.1-alpha.0" +} \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 412eb3a2..36b2db47 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -59,7 +59,7 @@ If you’d like to use the repository from source, you can either install from g To install via git: ```bash -pip install git+ssh://git@github.com/stainless-sdks/TogetherAI-python.git +pip install git+ssh://git@github.com/togethercomputer/together-py.git ``` Alternatively, you can build from source and install the wheel file: @@ -117,7 +117,7 @@ the changes aren't made through the automated pipeline, you may want to make rel ### Publish with a GitHub workflow -You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/stainless-sdks/TogetherAI-python/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up. +You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/togethercomputer/together-py/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up. ### Publish manually diff --git a/README.md b/README.md index 5db7d518..e52c2070 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,10 @@ The REST API documentation can be found [on docs.together.ai](https://docs.toget ## Installation ```sh -# install from this staging repo -pip install git+ssh://git@github.com/stainless-sdks/TogetherAI-python.git +# install from PyPI +pip install --pre together ``` -> [!NOTE] -> Once this package is [published to PyPI](https://app.stainlessapi.com/docs/guides/publish), this will become: `pip install --pre together` - ## Usage The full API of this library can be found in [api.md](api.md). @@ -296,9 +293,9 @@ completion = response.parse() # get the object that `chat.completions.create()` print(completion.choices) ``` -These methods return an [`APIResponse`](https://github.com/stainless-sdks/TogetherAI-python/tree/main/src/together/_response.py) object. +These methods return an [`APIResponse`](https://github.com/togethercomputer/together-py/tree/main/src/together/_response.py) object. -The async client returns an [`AsyncAPIResponse`](https://github.com/stainless-sdks/TogetherAI-python/tree/main/src/together/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. +The async client returns an [`AsyncAPIResponse`](https://github.com/togethercomputer/together-py/tree/main/src/together/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. #### `.with_streaming_response` @@ -394,7 +391,7 @@ This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) con We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience. -We are keen for your feedback; please open an [issue](https://www.github.com/stainless-sdks/TogetherAI-python/issues) with questions, bugs, or suggestions. +We are keen for your feedback; please open an [issue](https://www.github.com/togethercomputer/together-py/issues) with questions, bugs, or suggestions. ## Requirements diff --git a/bin/check-release-environment b/bin/check-release-environment new file mode 100644 index 00000000..8439dbde --- /dev/null +++ b/bin/check-release-environment @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +warnings=() +errors=() + +if [ -z "${PYPI_TOKEN}" ]; then + warnings+=("The TOGETHER_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.") +fi + +lenWarnings=${#warnings[@]} + +if [[ lenWarnings -gt 0 ]]; then + echo -e "Found the following warnings in the release environment:\n" + + for warning in "${warnings[@]}"; do + echo -e "- $warning\n" + done +fi + +lenErrors=${#errors[@]} + +if [[ lenErrors -gt 0 ]]; then + echo -e "Found the following errors in the release environment:\n" + + for error in "${errors[@]}"; do + echo -e "- $error\n" + done + + exit 1 +fi + +echo "The environment is ready to push releases!" diff --git a/pyproject.toml b/pyproject.toml index 1ccfdf6b..b834a03d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,8 +39,8 @@ classifiers = [ [project.urls] -Homepage = "https://github.com/stainless-sdks/TogetherAI-python" -Repository = "https://github.com/stainless-sdks/TogetherAI-python" +Homepage = "https://github.com/togethercomputer/together-py" +Repository = "https://github.com/togethercomputer/together-py" @@ -108,7 +108,7 @@ path = "README.md" [[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]] # replace relative links with absolute links pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)' -replacement = '[\1](https://github.com/stainless-sdks/TogetherAI-python/tree/main/\g<2>)' +replacement = '[\1](https://github.com/togethercomputer/together-py/tree/main/\g<2>)' [tool.black] line-length = 120 diff --git a/release-please-config.json b/release-please-config.json new file mode 100644 index 00000000..9dc714d7 --- /dev/null +++ b/release-please-config.json @@ -0,0 +1,66 @@ +{ + "packages": { + ".": {} + }, + "$schema": "https://raw.githubusercontent.com/stainless-api/release-please/main/schemas/config.json", + "include-v-in-tag": true, + "include-component-in-tag": false, + "versioning": "prerelease", + "prerelease": true, + "bump-minor-pre-major": true, + "bump-patch-for-minor-pre-major": false, + "pull-request-header": "Automated Release PR", + "pull-request-title-pattern": "release: ${version}", + "changelog-sections": [ + { + "type": "feat", + "section": "Features" + }, + { + "type": "fix", + "section": "Bug Fixes" + }, + { + "type": "perf", + "section": "Performance Improvements" + }, + { + "type": "revert", + "section": "Reverts" + }, + { + "type": "chore", + "section": "Chores" + }, + { + "type": "docs", + "section": "Documentation" + }, + { + "type": "style", + "section": "Styles" + }, + { + "type": "refactor", + "section": "Refactors" + }, + { + "type": "test", + "section": "Tests", + "hidden": true + }, + { + "type": "build", + "section": "Build System" + }, + { + "type": "ci", + "section": "Continuous Integration", + "hidden": true + } + ], + "release-type": "python", + "extra-files": [ + "src/together/_version.py" + ] +} \ No newline at end of file diff --git a/src/together/_version.py b/src/together/_version.py index e8ad9cc7..e4f5dac8 100644 --- a/src/together/_version.py +++ b/src/together/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "together" -__version__ = "0.0.1-alpha.0" +__version__ = "0.0.1-alpha.0" # x-release-please-version From cd703fbdb178f4f05ffc43af0e86f5218537ce5c Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 14 Jun 2024 23:26:13 +0000 Subject: [PATCH 02/10] feat(api): OpenAPI spec update via Stainless API (#3) --- .stats.yml | 2 +- src/together/_client.py | 4 +- src/together/resources/chat/completions.py | 46 +++++++++++-------- src/together/resources/completions.py | 28 +++++------ src/together/resources/embeddings.py | 4 +- src/together/resources/files.py | 16 +++---- src/together/resources/fine_tune.py | 24 +++++----- src/together/resources/images.py | 4 +- src/together/resources/models.py | 4 +- .../types/chat/completion_create_params.py | 8 ++-- .../types/completion_create_params.py | 2 +- tests/test_client.py | 4 +- 12 files changed, 77 insertions(+), 69 deletions(-) diff --git a/.stats.yml b/.stats.yml index d332e906..02655e1f 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 15 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-6e975518a2563fdb57394133f1ed9dfe426a2cf5d2fef793fd139627c93df4aa.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-5934359dd4fbab352cb5042ffbf08374bd3d3b6bc0550fd09797de44626772fe.yml diff --git a/src/together/_client.py b/src/together/_client.py index 9e4fd0c3..77e8d83a 100644 --- a/src/together/_client.py +++ b/src/together/_client.py @@ -131,7 +131,7 @@ def qs(self) -> Querystring: @override def auth_headers(self) -> dict[str, str]: api_key = self.api_key - return {"Authorization": f"Bearer {api_key}"} + return {"Authorization": api_key} @property @override @@ -313,7 +313,7 @@ def qs(self) -> Querystring: @override def auth_headers(self) -> dict[str, str]: api_key = self.api_key - return {"Authorization": f"Bearer {api_key}"} + return {"Authorization": api_key} @property @override diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py index d45eac28..43125d39 100644 --- a/src/together/resources/chat/completions.py +++ b/src/together/resources/chat/completions.py @@ -50,7 +50,7 @@ def create( model: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -74,7 +74,7 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: """ - Creates a model response for the given chat conversation. + Query a chat model. Args: messages: A list of messages comprising the conversation so far. @@ -114,7 +114,8 @@ def create( stop: A list of string sequences that will truncate (stop) inference text output. stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + Stream terminates with `data: [DONE]`. If false, return a single JSON object + containing the results. temperature: Determines the degree of randomness in the response. @@ -147,7 +148,7 @@ def create( stream: Literal[True], echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -170,7 +171,7 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Stream[ChatCompletionChunk]: """ - Creates a model response for the given chat conversation. + Query a chat model. Args: messages: A list of messages comprising the conversation so far. @@ -178,7 +179,8 @@ def create( model: The name of the model to query. stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + Stream terminates with `data: [DONE]`. If false, return a single JSON object + containing the results. echo: If set, the response will contain the prompt, and will also return prompt logprobs if set with logprobs. @@ -243,7 +245,7 @@ def create( stream: bool, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -266,7 +268,7 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion | Stream[ChatCompletionChunk]: """ - Creates a model response for the given chat conversation. + Query a chat model. Args: messages: A list of messages comprising the conversation so far. @@ -274,7 +276,8 @@ def create( model: The name of the model to query. stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + Stream terminates with `data: [DONE]`. If false, return a single JSON object + containing the results. echo: If set, the response will contain the prompt, and will also return prompt logprobs if set with logprobs. @@ -338,7 +341,7 @@ def create( model: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -414,7 +417,7 @@ async def create( model: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -438,7 +441,7 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: """ - Creates a model response for the given chat conversation. + Query a chat model. Args: messages: A list of messages comprising the conversation so far. @@ -478,7 +481,8 @@ async def create( stop: A list of string sequences that will truncate (stop) inference text output. stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + Stream terminates with `data: [DONE]`. If false, return a single JSON object + containing the results. temperature: Determines the degree of randomness in the response. @@ -511,7 +515,7 @@ async def create( stream: Literal[True], echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -534,7 +538,7 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> AsyncStream[ChatCompletionChunk]: """ - Creates a model response for the given chat conversation. + Query a chat model. Args: messages: A list of messages comprising the conversation so far. @@ -542,7 +546,8 @@ async def create( model: The name of the model to query. stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + Stream terminates with `data: [DONE]`. If false, return a single JSON object + containing the results. echo: If set, the response will contain the prompt, and will also return prompt logprobs if set with logprobs. @@ -607,7 +612,7 @@ async def create( stream: bool, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -630,7 +635,7 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: """ - Creates a model response for the given chat conversation. + Query a chat model. Args: messages: A list of messages comprising the conversation so far. @@ -638,7 +643,8 @@ async def create( model: The name of the model to query. stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + Stream terminates with `data: [DONE]`. If false, return a single JSON object + containing the results. echo: If set, the response will contain the prompt, and will also return prompt logprobs if set with logprobs. @@ -702,7 +708,7 @@ async def create( model: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, diff --git a/src/together/resources/completions.py b/src/together/resources/completions.py index fb72ecb8..163fc7a5 100644 --- a/src/together/resources/completions.py +++ b/src/together/resources/completions.py @@ -48,7 +48,7 @@ def create( prompt: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -69,7 +69,7 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Completion: """ - Creates a completion for the provided prompt and parameters + Query a language, code, or image model. Args: model: The name of the model to query. @@ -136,7 +136,7 @@ def create( stream: Literal[True], echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -156,7 +156,7 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Stream[Completion]: """ - Creates a completion for the provided prompt and parameters + Query a language, code, or image model. Args: model: The name of the model to query. @@ -223,7 +223,7 @@ def create( stream: bool, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -243,7 +243,7 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Completion | Stream[Completion]: """ - Creates a completion for the provided prompt and parameters + Query a language, code, or image model. Args: model: The name of the model to query. @@ -309,7 +309,7 @@ def create( prompt: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -379,7 +379,7 @@ async def create( prompt: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -400,7 +400,7 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Completion: """ - Creates a completion for the provided prompt and parameters + Query a language, code, or image model. Args: model: The name of the model to query. @@ -467,7 +467,7 @@ async def create( stream: Literal[True], echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -487,7 +487,7 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> AsyncStream[Completion]: """ - Creates a completion for the provided prompt and parameters + Query a language, code, or image model. Args: model: The name of the model to query. @@ -554,7 +554,7 @@ async def create( stream: bool, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, @@ -574,7 +574,7 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Completion | AsyncStream[Completion]: """ - Creates a completion for the provided prompt and parameters + Query a language, code, or image model. Args: model: The name of the model to query. @@ -640,7 +640,7 @@ async def create( prompt: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, object] | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, min_p: float | NotGiven = NOT_GIVEN, diff --git a/src/together/resources/embeddings.py b/src/together/resources/embeddings.py index 75ccf4bd..290c574d 100644 --- a/src/together/resources/embeddings.py +++ b/src/together/resources/embeddings.py @@ -48,7 +48,7 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Embedding: """ - Creates an embedding vector representing the input text + Query an embedding model for a given string of text. Args: input: A string providing the text for the model to embed. @@ -101,7 +101,7 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Embedding: """ - Creates an embedding vector representing the input text + Query an embedding model for a given string of text. Args: input: A string providing the text for the model to embed. diff --git a/src/together/resources/files.py b/src/together/resources/files.py index ee055f07..90b95bf1 100644 --- a/src/together/resources/files.py +++ b/src/together/resources/files.py @@ -52,7 +52,7 @@ def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FileRetrieveResponse: """ - Retrieve a file + List the metadata for a single uploaded data file. Args: extra_headers: Send extra headers @@ -83,7 +83,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FileListResponse: - """List all files""" + """List the metadata for all uploaded data files.""" return self._get( "/files", options=make_request_options( @@ -104,7 +104,7 @@ def delete( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FileDeleteResponse: """ - Delete a file + Delete a previously uploaded data file. Args: extra_headers: Send extra headers @@ -137,7 +137,7 @@ def content( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> BinaryAPIResponse: """ - Retrieve file content + Get the contents of a single uploaded data file. Args: extra_headers: Send extra headers @@ -181,7 +181,7 @@ async def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FileRetrieveResponse: """ - Retrieve a file + List the metadata for a single uploaded data file. Args: extra_headers: Send extra headers @@ -212,7 +212,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FileListResponse: - """List all files""" + """List the metadata for all uploaded data files.""" return await self._get( "/files", options=make_request_options( @@ -233,7 +233,7 @@ async def delete( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FileDeleteResponse: """ - Delete a file + Delete a previously uploaded data file. Args: extra_headers: Send extra headers @@ -266,7 +266,7 @@ async def content( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> AsyncBinaryAPIResponse: """ - Retrieve file content + Get the contents of a single uploaded data file. Args: extra_headers: Send extra headers diff --git a/src/together/resources/fine_tune.py b/src/together/resources/fine_tune.py index e3aab276..c75ac6ae 100644 --- a/src/together/resources/fine_tune.py +++ b/src/together/resources/fine_tune.py @@ -57,7 +57,7 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTune: """ - Create a fine-tuning job + Use a model to create a fine-tuning job. Args: model: Name of the base model to run fine-tune job on @@ -117,7 +117,7 @@ def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTune: """ - Retrieve fine-tune job details + List the metadata for a single fine-tuning job. Args: extra_headers: Send extra headers @@ -148,7 +148,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTuneListResponse: - """List fine-tune job history""" + """List the metadata for all fine-tuning jobs.""" return self._get( "/fine-tunes", options=make_request_options( @@ -169,7 +169,7 @@ def cancel( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTune: """ - Cancels a running fine-tuning job. + Cancel a currently running fine-tuning job. Args: extra_headers: Send extra headers @@ -204,7 +204,7 @@ def download( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTuneDownloadResponse: """ - Downloads a compressed fine-tuned model or checkpoint to local disk. + Download a compressed fine-tuned model or checkpoint to local disk. Args: ft_id: Fine-tune ID to download. A string that starts with `ft-`. @@ -253,7 +253,7 @@ def list_events( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTuneEvent: """ - List events of a fine-tune job + List the events for a single fine-tuning job. Args: extra_headers: Send extra headers @@ -303,7 +303,7 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTune: """ - Create a fine-tuning job + Use a model to create a fine-tuning job. Args: model: Name of the base model to run fine-tune job on @@ -363,7 +363,7 @@ async def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTune: """ - Retrieve fine-tune job details + List the metadata for a single fine-tuning job. Args: extra_headers: Send extra headers @@ -394,7 +394,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTuneListResponse: - """List fine-tune job history""" + """List the metadata for all fine-tuning jobs.""" return await self._get( "/fine-tunes", options=make_request_options( @@ -415,7 +415,7 @@ async def cancel( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTune: """ - Cancels a running fine-tuning job. + Cancel a currently running fine-tuning job. Args: extra_headers: Send extra headers @@ -450,7 +450,7 @@ async def download( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTuneDownloadResponse: """ - Downloads a compressed fine-tuned model or checkpoint to local disk. + Download a compressed fine-tuned model or checkpoint to local disk. Args: ft_id: Fine-tune ID to download. A string that starts with `ft-`. @@ -499,7 +499,7 @@ async def list_events( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTuneEvent: """ - List events of a fine-tune job + List the events for a single fine-tuning job. Args: extra_headers: Send extra headers diff --git a/src/together/resources/images.py b/src/together/resources/images.py index d57d1240..ec9156c6 100644 --- a/src/together/resources/images.py +++ b/src/together/resources/images.py @@ -54,7 +54,7 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ImageFile: """ - Generate images based on a given prompt using a specified model + Use an image model to generate an image for a given prompt. Args: model: The model to use for image generation. @@ -131,7 +131,7 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ImageFile: """ - Generate images based on a given prompt using a specified model + Use an image model to generate an image for a given prompt. Args: model: The model to use for image generation. diff --git a/src/together/resources/models.py b/src/together/resources/models.py index edb727fc..da92d7b9 100644 --- a/src/together/resources/models.py +++ b/src/together/resources/models.py @@ -40,7 +40,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ModelListResponse: - """Lists all the available models""" + """Lists all of Together's open-source models""" return self._get( "/models", options=make_request_options( @@ -69,7 +69,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ModelListResponse: - """Lists all the available models""" + """Lists all of Together's open-source models""" return await self._get( "/models", options=make_request_options( diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py index d1e404b9..05c6a86a 100644 --- a/src/together/types/chat/completion_create_params.py +++ b/src/together/types/chat/completion_create_params.py @@ -38,7 +38,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): mentioned prior. """ - logit_bias: Dict[str, object] + logit_bias: Dict[str, float] """ The `logit_bias` parameter allows us to adjust the likelihood of specific tokens appearing in the generated output. @@ -128,7 +128,8 @@ class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase): stream: Literal[False] """If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + Stream terminates with `data: [DONE]`. If false, return a single JSON object + containing the results. """ @@ -136,7 +137,8 @@ class CompletionCreateParamsStreaming(CompletionCreateParamsBase): stream: Required[Literal[True]] """If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + Stream terminates with `data: [DONE]`. If false, return a single JSON object + containing the results. """ diff --git a/src/together/types/completion_create_params.py b/src/together/types/completion_create_params.py index f2cb084e..7f4e1fef 100644 --- a/src/together/types/completion_create_params.py +++ b/src/together/types/completion_create_params.py @@ -28,7 +28,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): mentioned prior. """ - logit_bias: Dict[str, object] + logit_bias: Dict[str, float] """ The `logit_bias` parameter allows us to adjust the likelihood of specific tokens appearing in the generated output. diff --git a/tests/test_client.py b/tests/test_client.py index b09a8db0..2e3679a9 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -332,7 +332,7 @@ def test_default_headers_option(self) -> None: def test_validate_headers(self) -> None: client = Together(base_url=base_url, api_key=api_key, _strict_response_validation=True) request = client._build_request(FinalRequestOptions(method="get", url="/foo")) - assert request.headers.get("Authorization") == f"Bearer {api_key}" + assert request.headers.get("Authorization") == api_key with pytest.raises(TogetherError): client2 = Together(base_url=base_url, api_key=None, _strict_response_validation=True) @@ -1048,7 +1048,7 @@ def test_default_headers_option(self) -> None: def test_validate_headers(self) -> None: client = AsyncTogether(base_url=base_url, api_key=api_key, _strict_response_validation=True) request = client._build_request(FinalRequestOptions(method="get", url="/foo")) - assert request.headers.get("Authorization") == f"Bearer {api_key}" + assert request.headers.get("Authorization") == api_key with pytest.raises(TogetherError): client2 = AsyncTogether(base_url=base_url, api_key=None, _strict_response_validation=True) From 00ef6cc33f844ef3d214e805f3bdfa28240905b7 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 16:28:55 +0000 Subject: [PATCH 03/10] feat(api): OpenAPI spec update via Stainless API (#4) --- .stats.yml | 2 +- src/together/_base_client.py | 25 +- src/together/_utils/__init__.py | 1 + src/together/_utils/_reflection.py | 8 + src/together/_utils/_sync.py | 19 +- src/together/resources/chat/completions.py | 390 +++++++++++------- src/together/resources/completions.py | 338 +++++++++------ .../types/chat/completion_create_params.py | 92 +++-- .../types/completion_create_params.py | 76 ++-- 9 files changed, 585 insertions(+), 366 deletions(-) create mode 100644 src/together/_utils/_reflection.py diff --git a/.stats.yml b/.stats.yml index 02655e1f..d6da9ca3 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 15 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-5934359dd4fbab352cb5042ffbf08374bd3d3b6bc0550fd09797de44626772fe.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-33661dd8fd4c26ecd595dee22e2c9274e6c4699ad8de5ece233e0d37376c6b7c.yml diff --git a/src/together/_base_client.py b/src/together/_base_client.py index 66db09be..a806809b 100644 --- a/src/together/_base_client.py +++ b/src/together/_base_client.py @@ -60,7 +60,7 @@ RequestOptions, ModelBuilderProtocol, ) -from ._utils import is_dict, is_list, is_given, lru_cache, is_mapping +from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping from ._compat import model_copy, model_dump from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type from ._response import ( @@ -358,6 +358,7 @@ def __init__( self._custom_query = custom_query or {} self._strict_response_validation = _strict_response_validation self._idempotency_header = None + self._platform: Platform | None = None if max_retries is None: # pyright: ignore[reportUnnecessaryComparison] raise TypeError( @@ -456,7 +457,7 @@ def _build_request( raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`") headers = self._build_headers(options) - params = _merge_mappings(self._custom_query, options.params) + params = _merge_mappings(self.default_query, options.params) content_type = headers.get("Content-Type") # If the given Content-Type header is multipart/form-data then it @@ -592,6 +593,12 @@ def default_headers(self) -> dict[str, str | Omit]: **self._custom_headers, } + @property + def default_query(self) -> dict[str, object]: + return { + **self._custom_query, + } + def _validate_headers( self, headers: Headers, # noqa: ARG002 @@ -616,7 +623,10 @@ def base_url(self, url: URL | str) -> None: self._base_url = self._enforce_trailing_slash(url if isinstance(url, URL) else URL(url)) def platform_headers(self) -> Dict[str, str]: - return platform_headers(self._version) + # the actual implementation is in a separate `lru_cache` decorated + # function because adding `lru_cache` to methods will leak memory + # https://github.com/python/cpython/issues/88476 + return platform_headers(self._version, platform=self._platform) def _parse_retry_after_header(self, response_headers: Optional[httpx.Headers] = None) -> float | None: """Returns a float of the number of seconds (not milliseconds) to wait after retrying, or None if unspecified. @@ -1492,6 +1502,11 @@ async def _request( stream_cls: type[_AsyncStreamT] | None, remaining_retries: int | None, ) -> ResponseT | _AsyncStreamT: + if self._platform is None: + # `get_platform` can make blocking IO calls so we + # execute it earlier while we are in an async context + self._platform = await asyncify(get_platform)() + cast_to = self._maybe_override_cast_to(cast_to, options) await self._prepare_options(options) @@ -1915,11 +1930,11 @@ def get_platform() -> Platform: @lru_cache(maxsize=None) -def platform_headers(version: str) -> Dict[str, str]: +def platform_headers(version: str, *, platform: Platform | None) -> Dict[str, str]: return { "X-Stainless-Lang": "python", "X-Stainless-Package-Version": version, - "X-Stainless-OS": str(get_platform()), + "X-Stainless-OS": str(platform or get_platform()), "X-Stainless-Arch": str(get_architecture()), "X-Stainless-Runtime": get_python_runtime(), "X-Stainless-Runtime-Version": get_python_version(), diff --git a/src/together/_utils/__init__.py b/src/together/_utils/__init__.py index 31b5b227..667e2473 100644 --- a/src/together/_utils/__init__.py +++ b/src/together/_utils/__init__.py @@ -49,3 +49,4 @@ maybe_transform as maybe_transform, async_maybe_transform as async_maybe_transform, ) +from ._reflection import function_has_argument as function_has_argument diff --git a/src/together/_utils/_reflection.py b/src/together/_utils/_reflection.py new file mode 100644 index 00000000..e134f58e --- /dev/null +++ b/src/together/_utils/_reflection.py @@ -0,0 +1,8 @@ +import inspect +from typing import Any, Callable + + +def function_has_argument(func: Callable[..., Any], arg_name: str) -> bool: + """Returns whether or not the given function has a specific parameter""" + sig = inspect.signature(func) + return arg_name in sig.parameters diff --git a/src/together/_utils/_sync.py b/src/together/_utils/_sync.py index 595924e5..d0d81033 100644 --- a/src/together/_utils/_sync.py +++ b/src/together/_utils/_sync.py @@ -7,6 +7,8 @@ import anyio import anyio.to_thread +from ._reflection import function_has_argument + T_Retval = TypeVar("T_Retval") T_ParamSpec = ParamSpec("T_ParamSpec") @@ -59,6 +61,21 @@ def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str: async def wrapper(*args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs) -> T_Retval: partial_f = functools.partial(function, *args, **kwargs) - return await anyio.to_thread.run_sync(partial_f, cancellable=cancellable, limiter=limiter) + + # In `v4.1.0` anyio added the `abandon_on_cancel` argument and deprecated the old + # `cancellable` argument, so we need to use the new `abandon_on_cancel` to avoid + # surfacing deprecation warnings. + if function_has_argument(anyio.to_thread.run_sync, "abandon_on_cancel"): + return await anyio.to_thread.run_sync( + partial_f, + abandon_on_cancel=cancellable, + limiter=limiter, + ) + + return await anyio.to_thread.run_sync( + partial_f, + cancellable=cancellable, + limiter=limiter, + ) return wrapper diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py index 43125d39..0d69a7c8 100644 --- a/src/together/resources/chat/completions.py +++ b/src/together/resources/chat/completions.py @@ -81,53 +81,66 @@ def create( model: The name of the model to query. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - response_format: Specifies the format of the response. + response_format: An object specifying the format that the model must output. - safety_model: The name of the safety model to use. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). - stop: A list of string sequences that will truncate (stop) inference text output. + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]`. If false, return a single JSON object - containing the results. + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - temperature: Determines the degree of randomness in the response. + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. - tool_choice: The choice of tool to use. + tool_choice: Controls which (if any) function is called by the model. By default uses `auto`, + which lets the model pick between generating a message or calling a function. - tools: A list of tools to be used in the query. + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers @@ -178,53 +191,66 @@ def create( model: The name of the model to query. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]`. If false, return a single JSON object - containing the results. + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - response_format: Specifies the format of the response. + response_format: An object specifying the format that the model must output. - safety_model: The name of the safety model to use. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). - stop: A list of string sequences that will truncate (stop) inference text output. + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. - temperature: Determines the degree of randomness in the response. + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. - tool_choice: The choice of tool to use. + tool_choice: Controls which (if any) function is called by the model. By default uses `auto`, + which lets the model pick between generating a message or calling a function. - tools: A list of tools to be used in the query. + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers @@ -275,53 +301,66 @@ def create( model: The name of the model to query. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]`. If false, return a single JSON object - containing the results. + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - response_format: Specifies the format of the response. + response_format: An object specifying the format that the model must output. - safety_model: The name of the safety model to use. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). - stop: A list of string sequences that will truncate (stop) inference text output. + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. - temperature: Determines the degree of randomness in the response. + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. - tool_choice: The choice of tool to use. + tool_choice: Controls which (if any) function is called by the model. By default uses `auto`, + which lets the model pick between generating a message or calling a function. - tools: A list of tools to be used in the query. + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers @@ -448,53 +487,66 @@ async def create( model: The name of the model to query. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - response_format: Specifies the format of the response. + response_format: An object specifying the format that the model must output. - safety_model: The name of the safety model to use. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). - stop: A list of string sequences that will truncate (stop) inference text output. + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]`. If false, return a single JSON object - containing the results. + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - temperature: Determines the degree of randomness in the response. + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. - tool_choice: The choice of tool to use. + tool_choice: Controls which (if any) function is called by the model. By default uses `auto`, + which lets the model pick between generating a message or calling a function. - tools: A list of tools to be used in the query. + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers @@ -545,53 +597,66 @@ async def create( model: The name of the model to query. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]`. If false, return a single JSON object - containing the results. + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - response_format: Specifies the format of the response. + response_format: An object specifying the format that the model must output. - safety_model: The name of the safety model to use. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). - stop: A list of string sequences that will truncate (stop) inference text output. + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. - temperature: Determines the degree of randomness in the response. + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. - tool_choice: The choice of tool to use. + tool_choice: Controls which (if any) function is called by the model. By default uses `auto`, + which lets the model pick between generating a message or calling a function. - tools: A list of tools to be used in the query. + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers @@ -642,53 +707,66 @@ async def create( model: The name of the model to query. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]`. If false, return a single JSON object - containing the results. + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - response_format: Specifies the format of the response. + response_format: An object specifying the format that the model must output. - safety_model: The name of the safety model to use. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). - stop: A list of string sequences that will truncate (stop) inference text output. + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. - temperature: Determines the degree of randomness in the response. + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. - tool_choice: The choice of tool to use. + tool_choice: Controls which (if any) function is called by the model. By default uses `auto`, + which lets the model pick between generating a message or calling a function. - tools: A list of tools to be used in the query. + tools: A list of tools the model may call. Currently, only functions are supported as a + tool. Use this to provide a list of functions the model may generate JSON inputs + for. - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers diff --git a/src/together/resources/completions.py b/src/together/resources/completions.py index 163fc7a5..3f777147 100644 --- a/src/together/resources/completions.py +++ b/src/together/resources/completions.py @@ -76,46 +76,57 @@ def create( prompt: A string providing context for the model to complete. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - safety_model: The name of the safety model to use. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). - stop: A list of string sequences that will truncate (stop) inference text output. + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - temperature: Determines the degree of randomness in the response. + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers @@ -163,46 +174,57 @@ def create( prompt: A string providing context for the model to complete. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - safety_model: The name of the safety model to use. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). - stop: A list of string sequences that will truncate (stop) inference text output. + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. - temperature: Determines the degree of randomness in the response. + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers @@ -250,46 +272,57 @@ def create( prompt: A string providing context for the model to complete. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - safety_model: The name of the safety model to use. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). - stop: A list of string sequences that will truncate (stop) inference text output. + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. - temperature: Determines the degree of randomness in the response. + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers @@ -407,46 +440,57 @@ async def create( prompt: A string providing context for the model to complete. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - safety_model: The name of the safety model to use. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). - stop: A list of string sequences that will truncate (stop) inference text output. + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - temperature: Determines the degree of randomness in the response. + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers @@ -494,46 +538,57 @@ async def create( prompt: A string providing context for the model to complete. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - safety_model: The name of the safety model to use. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). - stop: A list of string sequences that will truncate (stop) inference text output. + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. - temperature: Determines the degree of randomness in the response. + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers @@ -581,46 +636,57 @@ async def create( prompt: A string providing context for the model to complete. - stream: If set, tokens are returned as Server-Sent Events as they are made available. - Stream terminates with `data: [DONE]` + stream: If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + echo: If true, the response will contain the prompt. Can be used with `logprobs` to + return prompt logprobs. - frequency_penalty: The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. - logit_bias: The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. + logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. max_tokens: The maximum number of tokens to generate. - min_p: The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. + min_p: A number between 0 and 1 that can be used as an alternative to temperature. - n: Number of generations to return + n: The number of completions to generate for each prompt. - presence_penalty: The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. repetition_penalty: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition. - safety_model: The name of the safety model to use. - - stop: A list of string sequences that will truncate (stop) inference text output. - - temperature: Determines the degree of randomness in the response. - - top_k: The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. - - top_p: The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + safety_model: The name of the moderation model used to validate tokens. Choose from the + available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). + + stop: A list of string sequences that will truncate (stop) inference text output. For + example, "" will stop generation as soon as the model generates the given + token. + + temperature: A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. + + top_k: An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. + + top_p: A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. extra_headers: Send extra headers diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py index 05c6a86a..8b57ef5b 100644 --- a/src/together/types/chat/completion_create_params.py +++ b/src/together/types/chat/completion_create_params.py @@ -26,46 +26,39 @@ class CompletionCreateParamsBase(TypedDict, total=False): """The name of the model to query.""" echo: bool - """ - If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + """If true, the response will contain the prompt. + + Can be used with `logprobs` to return prompt logprobs. """ frequency_penalty: float """ - The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. """ logit_bias: Dict[str, float] - """ - The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. - """ + """Adjusts the likelihood of specific tokens appearing in the generated output.""" logprobs: int """ Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. """ max_tokens: int """The maximum number of tokens to generate.""" min_p: float - """ - The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. - """ + """A number between 0 and 1 that can be used as an alternative to temperature.""" n: int - """Number of generations to return""" + """The number of completions to generate for each prompt.""" presence_penalty: float """ - The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. """ repetition_penalty: float @@ -75,33 +68,60 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ response_format: ResponseFormat - """Specifies the format of the response.""" + """An object specifying the format that the model must output.""" safety_model: str - """The name of the safety model to use.""" + """The name of the moderation model used to validate tokens. + + Choose from the available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). + """ stop: List[str] - """A list of string sequences that will truncate (stop) inference text output.""" + """A list of string sequences that will truncate (stop) inference text output. + + For example, "" will stop generation as soon as the model generates the + given token. + """ temperature: float - """Determines the degree of randomness in the response.""" + """ + A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. + """ tool_choice: ToolChoice - """The choice of tool to use.""" + """Controls which (if any) function is called by the model. + + By default uses `auto`, which lets the model pick between generating a message + or calling a function. + """ tools: Iterable[ToolsParam] - """A list of tools to be used in the query.""" + """A list of tools the model may call. + + Currently, only functions are supported as a tool. Use this to provide a list of + functions the model may generate JSON inputs for. + """ top_k: int """ - The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. """ top_p: float """ - The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. """ @@ -126,19 +146,19 @@ class ResponseFormat(TypedDict, total=False): class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase): stream: Literal[False] - """If set, tokens are returned as Server-Sent Events as they are made available. - - Stream terminates with `data: [DONE]`. If false, return a single JSON object - containing the results. + """ + If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. """ class CompletionCreateParamsStreaming(CompletionCreateParamsBase): stream: Required[Literal[True]] - """If set, tokens are returned as Server-Sent Events as they are made available. - - Stream terminates with `data: [DONE]`. If false, return a single JSON object - containing the results. + """ + If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. """ diff --git a/src/together/types/completion_create_params.py b/src/together/types/completion_create_params.py index 7f4e1fef..050a5477 100644 --- a/src/together/types/completion_create_params.py +++ b/src/together/types/completion_create_params.py @@ -16,46 +16,39 @@ class CompletionCreateParamsBase(TypedDict, total=False): """A string providing context for the model to complete.""" echo: bool - """ - If set, the response will contain the prompt, and will also return prompt - logprobs if set with logprobs. + """If true, the response will contain the prompt. + + Can be used with `logprobs` to return prompt logprobs. """ frequency_penalty: float """ - The `frequency_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will decrease the likelihood of repeating tokens that were - mentioned prior. + A number between -2.0 and 2.0 where a positive value decreases the likelihood of + repeating tokens that have already been mentioned. """ logit_bias: Dict[str, float] - """ - The `logit_bias` parameter allows us to adjust the likelihood of specific tokens - appearing in the generated output. - """ + """Adjusts the likelihood of specific tokens appearing in the generated output.""" logprobs: int """ Determines the number of most likely tokens to return at each token position log - probabilities to return + probabilities to return. """ max_tokens: int """The maximum number of tokens to generate.""" min_p: float - """ - The `min_p` parameter is a number between 0 and 1 and an alternative to - `temperature`. - """ + """A number between 0 and 1 that can be used as an alternative to temperature.""" n: int - """Number of generations to return""" + """The number of completions to generate for each prompt.""" presence_penalty: float """ - The `presence_penalty` parameter is a number between -2.0 and 2.0 where a - positive value will increase the likelihood of a model talking about new topics. + A number between -2.0 and 2.0 where a positive value increases the likelihood of + a model talking about new topics. """ repetition_penalty: float @@ -65,40 +58,61 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ safety_model: str - """The name of the safety model to use.""" + """The name of the moderation model used to validate tokens. + + Choose from the available moderation models found + [here](https://docs.together.ai/docs/inference-models#moderation-models). + """ stop: List[str] - """A list of string sequences that will truncate (stop) inference text output.""" + """A list of string sequences that will truncate (stop) inference text output. + + For example, "" will stop generation as soon as the model generates the + given token. + """ temperature: float - """Determines the degree of randomness in the response.""" + """ + A decimal number from 0-1 that determines the degree of randomness in the + response. A temperature less than 1 favors more correctness and is appropriate + for question answering or summarization. A value closer to 1 introduces more + randomness in the output. + """ top_k: int """ - The `top_k` parameter is used to limit the number of choices for the next - predicted word or token. + An integer that's used to limit the number of choices for the next predicted + word or token. It specifies the maximum number of tokens to consider at each + step, based on their probability of occurrence. This technique helps to speed up + the generation process and can improve the quality of the generated text by + focusing on the most likely options. """ top_p: float """ - The `top_p` (nucleus) parameter is used to dynamically adjust the number of - choices for each predicted token based on the cumulative probabilities. + A percentage (also called the nucleus parameter) that's used to dynamically + adjust the number of choices for each predicted token based on the cumulative + probabilities. It specifies a probability threshold below which all less likely + tokens are filtered out. This technique helps maintain diversity and generate + more fluent and natural-sounding text. """ class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase): stream: Literal[False] - """If set, tokens are returned as Server-Sent Events as they are made available. - - Stream terminates with `data: [DONE]` + """ + If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. """ class CompletionCreateParamsStreaming(CompletionCreateParamsBase): stream: Required[Literal[True]] - """If set, tokens are returned as Server-Sent Events as they are made available. - - Stream terminates with `data: [DONE]` + """ + If true, stream tokens as Server-Sent Events as the model generates them instead + of waiting for the full model response. The stream terminates with + `data: [DONE]`. If false, return a single JSON object containing the results. """ From 3e9827b08f2698029e31df3d770d7f873b9d610d Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Wed, 26 Jun 2024 17:26:31 +0000 Subject: [PATCH 04/10] feat(api): OpenAPI spec update via Stainless API (#5) --- .stats.yml | 2 +- README.md | 2 +- bin/publish-pypi | 3 ++ src/together/resources/chat/completions.py | 10 ++++++ src/together/types/__init__.py | 1 + src/together/types/chat/chat_completion.py | 33 ++++++++++++++----- .../types/chat/chat_completion_chunk.py | 32 +++++++++++------- .../types/chat/completion_create_params.py | 11 +++++++ src/together/types/completion.py | 2 +- src/together/types/fine_tune_event.py | 3 +- src/together/types/log_probs.py | 10 +++++- src/together/types/tool_choice.py | 23 +++++++++++++ src/together/types/tool_choice_param.py | 14 +++++--- tests/api_resources/chat/test_completions.py | 4 +++ 14 files changed, 121 insertions(+), 29 deletions(-) create mode 100644 src/together/types/tool_choice.py diff --git a/.stats.yml b/.stats.yml index d6da9ca3..cf0d3ce7 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 15 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-33661dd8fd4c26ecd595dee22e2c9274e6c4699ad8de5ece233e0d37376c6b7c.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-e8f4e11a2e3927c75dce42c913ef5c9adcf2aef3d3b1312b4825d9f135413c39.yml diff --git a/README.md b/README.md index e52c2070..70154594 100644 --- a/README.md +++ b/README.md @@ -362,7 +362,7 @@ You can directly override the [httpx client](https://www.python-httpx.org/api/#c - Support for proxies - Custom transports -- Additional [advanced](https://www.python-httpx.org/advanced/#client-instances) functionality +- Additional [advanced](https://www.python-httpx.org/advanced/clients/) functionality ```python from together import Together, DefaultHttpxClient diff --git a/bin/publish-pypi b/bin/publish-pypi index 826054e9..05bfccbb 100644 --- a/bin/publish-pypi +++ b/bin/publish-pypi @@ -3,4 +3,7 @@ set -eux mkdir -p dist rye build --clean +# Patching importlib-metadata version until upstream library version is updated +# https://github.com/pypa/twine/issues/977#issuecomment-2189800841 +"$HOME/.rye/self/bin/python3" -m pip install 'importlib-metadata==7.2.1' rye publish --yes --token=$PYPI_TOKEN diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py index 0d69a7c8..5d698125 100644 --- a/src/together/resources/chat/completions.py +++ b/src/together/resources/chat/completions.py @@ -50,6 +50,7 @@ def create( model: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, @@ -161,6 +162,7 @@ def create( stream: Literal[True], echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, @@ -271,6 +273,7 @@ def create( stream: bool, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, @@ -380,6 +383,7 @@ def create( model: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, @@ -411,6 +415,7 @@ def create( "model": model, "echo": echo, "frequency_penalty": frequency_penalty, + "function_call": function_call, "logit_bias": logit_bias, "logprobs": logprobs, "max_tokens": max_tokens, @@ -456,6 +461,7 @@ async def create( model: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, @@ -567,6 +573,7 @@ async def create( stream: Literal[True], echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, @@ -677,6 +684,7 @@ async def create( stream: bool, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, @@ -786,6 +794,7 @@ async def create( model: str, echo: bool | NotGiven = NOT_GIVEN, frequency_penalty: float | NotGiven = NOT_GIVEN, + function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN, logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN, logprobs: int | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, @@ -817,6 +826,7 @@ async def create( "model": model, "echo": echo, "frequency_penalty": frequency_penalty, + "function_call": function_call, "logit_bias": logit_bias, "logprobs": logprobs, "max_tokens": max_tokens, diff --git a/src/together/types/__init__.py b/src/together/types/__init__.py index ab0e2f56..cc25b8a0 100644 --- a/src/together/types/__init__.py +++ b/src/together/types/__init__.py @@ -7,6 +7,7 @@ from .log_probs import LogProbs as LogProbs from .completion import Completion as Completion from .image_file import ImageFile as ImageFile +from .tool_choice import ToolChoice as ToolChoice from .tools_param import ToolsParam as ToolsParam from .fine_tune_event import FineTuneEvent as FineTuneEvent from .tool_choice_param import ToolChoiceParam as ToolChoiceParam diff --git a/src/together/types/chat/chat_completion.py b/src/together/types/chat/chat_completion.py index 19fe4a27..ebe6722a 100644 --- a/src/together/types/chat/chat_completion.py +++ b/src/together/types/chat/chat_completion.py @@ -5,34 +5,51 @@ from ..._models import BaseModel from ..log_probs import LogProbs +from ..tool_choice import ToolChoice from .chat_completion_usage import ChatCompletionUsage -__all__ = ["ChatCompletion", "Choice", "ChoiceMessage"] +__all__ = ["ChatCompletion", "Choice", "ChoiceMessage", "ChoiceMessageFunctionCall"] + + +class ChoiceMessageFunctionCall(BaseModel): + arguments: str + + name: str class ChoiceMessage(BaseModel): content: Optional[str] = None - role: Optional[str] = None + role: Literal["assistant"] + + function_call: Optional[ChoiceMessageFunctionCall] = None + + tool_calls: Optional[List[ToolChoice]] = None class Choice(BaseModel): - finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls"]] = None + finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls", "function_call"]] = None + + index: Optional[int] = None logprobs: Optional[LogProbs] = None message: Optional[ChoiceMessage] = None + seed: Optional[int] = None + + text: Optional[str] = None + class ChatCompletion(BaseModel): - id: Optional[str] = None + id: str - choices: Optional[List[Choice]] = None + choices: List[Choice] - created: Optional[int] = None + created: int - model: Optional[str] = None + model: str - object: Optional[Literal["chat.completion"]] = None + object: Literal["chat.completion"] usage: Optional[ChatCompletionUsage] = None diff --git a/src/together/types/chat/chat_completion_chunk.py b/src/together/types/chat/chat_completion_chunk.py index df42bc2c..5107833d 100644 --- a/src/together/types/chat/chat_completion_chunk.py +++ b/src/together/types/chat/chat_completion_chunk.py @@ -4,42 +4,52 @@ from typing_extensions import Literal from ..._models import BaseModel +from ..log_probs import LogProbs +from ..tool_choice import ToolChoice from .chat_completion_usage import ChatCompletionUsage -__all__ = ["ChatCompletionChunk", "Token", "Choice", "ChoiceDelta"] +__all__ = ["ChatCompletionChunk", "Choice", "ChoiceDelta", "ChoiceDeltaFunctionCall"] -class Token(BaseModel): - id: int +class ChoiceDeltaFunctionCall(BaseModel): + arguments: str - logprob: float + name: str - special: bool - text: str +class ChoiceDelta(BaseModel): + content: Optional[str] = None + function_call: Optional[ChoiceDeltaFunctionCall] = None -class ChoiceDelta(BaseModel): - content: str + role: Optional[Literal["system", "user", "assistant", "function", "tool"]] = None + + token_id: Optional[int] = None + + tool_calls: Optional[List[ToolChoice]] = None class Choice(BaseModel): delta: ChoiceDelta + finish_reason: Literal["stop", "eos", "length", "tool_calls", "function_call"] + index: int + logprobs: Optional[LogProbs] = None + class ChatCompletionChunk(BaseModel): id: str - token: Token - choices: List[Choice] created: int + model: str + object: Literal["chat.completion.chunk"] - finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls"]] = None + system_fingerprint: Optional[str] = None usage: Optional[ChatCompletionUsage] = None diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py index 8b57ef5b..99963d95 100644 --- a/src/together/types/chat/completion_create_params.py +++ b/src/together/types/chat/completion_create_params.py @@ -11,6 +11,8 @@ __all__ = [ "CompletionCreateParamsBase", "Message", + "FunctionCall", + "FunctionCallName", "ResponseFormat", "ToolChoice", "CompletionCreateParamsNonStreaming", @@ -37,6 +39,8 @@ class CompletionCreateParamsBase(TypedDict, total=False): repeating tokens that have already been mentioned. """ + function_call: FunctionCall + logit_bias: Dict[str, float] """Adjusts the likelihood of specific tokens appearing in the generated output.""" @@ -133,6 +137,13 @@ class Message(TypedDict, total=False): """The role of the messages author. Choice between: system, user, or assistant.""" +class FunctionCallName(TypedDict, total=False): + name: Required[str] + + +FunctionCall = Union[Literal["none", "auto"], FunctionCallName] + + class ResponseFormat(TypedDict, total=False): schema: Dict[str, str] """The schema of the response format.""" diff --git a/src/together/types/completion.py b/src/together/types/completion.py index b1ea84db..98ce814f 100644 --- a/src/together/types/completion.py +++ b/src/together/types/completion.py @@ -11,7 +11,7 @@ class Choice(BaseModel): - finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls"]] = None + finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls", "function_call"]] = None logprobs: Optional[LogProbs] = None diff --git a/src/together/types/fine_tune_event.py b/src/together/types/fine_tune_event.py index 09d6e795..bc98905f 100644 --- a/src/together/types/fine_tune_event.py +++ b/src/together/types/fine_tune_event.py @@ -1,6 +1,5 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -import builtins from typing import List, Optional from typing_extensions import Literal @@ -64,7 +63,7 @@ class Data(BaseModel): wandb_url: str - level: Optional[builtins.object] = None + level: Optional[Literal["info", "warning", "error", "legacy_info", "legacy_iwarning", "legacy_ierror"]] = None class FineTuneEvent(BaseModel): diff --git a/src/together/types/log_probs.py b/src/together/types/log_probs.py index a67d3607..a9c408a0 100644 --- a/src/together/types/log_probs.py +++ b/src/together/types/log_probs.py @@ -4,10 +4,18 @@ from .._models import BaseModel -__all__ = ["LogProbs"] +__all__ = ["LogProbs", "Content"] + + +class Content(BaseModel): + token: str + + logprob: float class LogProbs(BaseModel): + content: Optional[List[Content]] = None + token_logprobs: Optional[List[float]] = None """List of token log probabilities""" diff --git a/src/together/types/tool_choice.py b/src/together/types/tool_choice.py new file mode 100644 index 00000000..d48c79c6 --- /dev/null +++ b/src/together/types/tool_choice.py @@ -0,0 +1,23 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["ToolChoice", "Function"] + + +class Function(BaseModel): + arguments: str + + name: str + + +class ToolChoice(BaseModel): + id: str + + function: Function + + index: float + + type: Literal["function"] diff --git a/src/together/types/tool_choice_param.py b/src/together/types/tool_choice_param.py index 241b8471..98b759fe 100644 --- a/src/together/types/tool_choice_param.py +++ b/src/together/types/tool_choice_param.py @@ -2,16 +2,22 @@ from __future__ import annotations -from typing_extensions import TypedDict +from typing_extensions import Literal, Required, TypedDict __all__ = ["ToolChoiceParam", "Function"] class Function(TypedDict, total=False): - name: str + arguments: Required[str] + + name: Required[str] class ToolChoiceParam(TypedDict, total=False): - function: Function + id: Required[str] + + function: Required[Function] + + index: Required[float] - type: str + type: Required[Literal["function"]] diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index f35a3e90..28a2cc9b 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -58,6 +58,7 @@ def test_method_create_with_all_params_overload_1(self, client: Together) -> Non model="mistralai/Mixtral-8x7B-Instruct-v0.1", echo=True, frequency_penalty=0, + function_call="none", logit_bias={ "105": 21.4, "1024": -10.5, @@ -203,6 +204,7 @@ def test_method_create_with_all_params_overload_2(self, client: Together) -> Non stream=True, echo=True, frequency_penalty=0, + function_call="none", logit_bias={ "105": 21.4, "1024": -10.5, @@ -350,6 +352,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn model="mistralai/Mixtral-8x7B-Instruct-v0.1", echo=True, frequency_penalty=0, + function_call="none", logit_bias={ "105": 21.4, "1024": -10.5, @@ -495,6 +498,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn stream=True, echo=True, frequency_penalty=0, + function_call="none", logit_bias={ "105": 21.4, "1024": -10.5, From a25a797f7f7d473ff3f2a939179e6576ec02f891 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 2 Jul 2024 14:04:43 +0000 Subject: [PATCH 05/10] feat(api): manual updates (#6) --- .gitignore | 1 + README.md | 2 +- pyproject.toml | 16 ++++++++++++++ requirements-dev.lock | 10 ++++++++- requirements.lock | 3 ++- src/together/_client.py | 4 ++-- src/together/_models.py | 27 ++++++++++++++++++++++++ src/together/_utils/__init__.py | 5 ++++- src/together/_utils/_reflection.py | 34 ++++++++++++++++++++++++++++++ tests/test_client.py | 4 ++-- 10 files changed, 98 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 0f9a66a9..87797408 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.prism.log .vscode _dev diff --git a/README.md b/README.md index 70154594..9aba58bb 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ It is generated with [Stainless](https://www.stainlessapi.com/). ## Documentation -The REST API documentation can be found [on docs.together.ai](https://docs.together.ai/). The full API of this library can be found in [api.md](api.md). +The REST API documentation can be found [on docs.together.ai](https://docs.together.ai). The full API of this library can be found in [api.md](api.md). ## Installation diff --git a/pyproject.toml b/pyproject.toml index b834a03d..3d54bfb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,7 @@ dev-dependencies = [ "nox", "dirty-equals>=0.6.0", "importlib-metadata>=6.7.0", + "rich>=13.7.1", ] @@ -99,6 +100,21 @@ include = [ [tool.hatch.build.targets.wheel] packages = ["src/together"] +[tool.hatch.build.targets.sdist] +# Basically everything except hidden files/directories (such as .github, .devcontainers, .python-version, etc) +include = [ + "/*.toml", + "/*.json", + "/*.lock", + "/*.md", + "/mypy.ini", + "/noxfile.py", + "bin/*", + "examples/*", + "src/*", + "tests/*", +] + [tool.hatch.metadata.hooks.fancy-pypi-readme] content-type = "text/markdown" diff --git a/requirements-dev.lock b/requirements-dev.lock index 0c449dc6..826c91cb 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -10,7 +10,7 @@ -e file:. annotated-types==0.6.0 # via pydantic -anyio==4.1.0 +anyio==4.4.0 # via httpx # via together argcomplete==3.1.2 @@ -44,6 +44,10 @@ idna==3.4 importlib-metadata==7.0.0 iniconfig==2.0.0 # via pytest +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py mypy==1.7.1 mypy-extensions==1.0.0 # via mypy @@ -63,6 +67,8 @@ pydantic==2.7.1 # via together pydantic-core==2.18.2 # via pydantic +pygments==2.18.0 + # via rich pyright==1.1.364 pytest==7.1.1 # via pytest-asyncio @@ -72,6 +78,7 @@ python-dateutil==2.8.2 pytz==2023.3.post1 # via dirty-equals respx==0.20.2 +rich==13.7.1 ruff==0.1.9 setuptools==68.2.2 # via nodeenv @@ -86,6 +93,7 @@ tomli==2.0.1 # via mypy # via pytest typing-extensions==4.8.0 + # via anyio # via mypy # via pydantic # via pydantic-core diff --git a/requirements.lock b/requirements.lock index d2e2cf38..a6341f0f 100644 --- a/requirements.lock +++ b/requirements.lock @@ -10,7 +10,7 @@ -e file:. annotated-types==0.6.0 # via pydantic -anyio==4.1.0 +anyio==4.4.0 # via httpx # via together certifi==2023.7.22 @@ -38,6 +38,7 @@ sniffio==1.3.0 # via httpx # via together typing-extensions==4.8.0 + # via anyio # via pydantic # via pydantic-core # via together diff --git a/src/together/_client.py b/src/together/_client.py index 77e8d83a..9e4fd0c3 100644 --- a/src/together/_client.py +++ b/src/together/_client.py @@ -131,7 +131,7 @@ def qs(self) -> Querystring: @override def auth_headers(self) -> dict[str, str]: api_key = self.api_key - return {"Authorization": api_key} + return {"Authorization": f"Bearer {api_key}"} @property @override @@ -313,7 +313,7 @@ def qs(self) -> Querystring: @override def auth_headers(self) -> dict[str, str]: api_key = self.api_key - return {"Authorization": api_key} + return {"Authorization": f"Bearer {api_key}"} @property @override diff --git a/src/together/_models.py b/src/together/_models.py index 75c68cc7..5d95bb4b 100644 --- a/src/together/_models.py +++ b/src/together/_models.py @@ -10,6 +10,7 @@ ClassVar, Protocol, Required, + ParamSpec, TypedDict, TypeGuard, final, @@ -67,6 +68,9 @@ __all__ = ["BaseModel", "GenericModel"] _T = TypeVar("_T") +_BaseModelT = TypeVar("_BaseModelT", bound="BaseModel") + +P = ParamSpec("P") @runtime_checkable @@ -379,6 +383,29 @@ def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericMo return issubclass(origin, BaseModel) or issubclass(origin, GenericModel) +def build( + base_model_cls: Callable[P, _BaseModelT], + *args: P.args, + **kwargs: P.kwargs, +) -> _BaseModelT: + """Construct a BaseModel class without validation. + + This is useful for cases where you need to instantiate a `BaseModel` + from an API response as this provides type-safe params which isn't supported + by helpers like `construct_type()`. + + ```py + build(MyModel, my_field_a="foo", my_field_b=123) + ``` + """ + if args: + raise TypeError( + "Received positional arguments which are not supported; Keyword arguments must be used instead", + ) + + return cast(_BaseModelT, construct_type(type_=base_model_cls, value=kwargs)) + + def construct_type(*, value: object, type_: object) -> object: """Loose coercion to the expected type with construction of nested values. diff --git a/src/together/_utils/__init__.py b/src/together/_utils/__init__.py index 667e2473..3efe66c8 100644 --- a/src/together/_utils/__init__.py +++ b/src/together/_utils/__init__.py @@ -49,4 +49,7 @@ maybe_transform as maybe_transform, async_maybe_transform as async_maybe_transform, ) -from ._reflection import function_has_argument as function_has_argument +from ._reflection import ( + function_has_argument as function_has_argument, + assert_signatures_in_sync as assert_signatures_in_sync, +) diff --git a/src/together/_utils/_reflection.py b/src/together/_utils/_reflection.py index e134f58e..9a53c7bd 100644 --- a/src/together/_utils/_reflection.py +++ b/src/together/_utils/_reflection.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import inspect from typing import Any, Callable @@ -6,3 +8,35 @@ def function_has_argument(func: Callable[..., Any], arg_name: str) -> bool: """Returns whether or not the given function has a specific parameter""" sig = inspect.signature(func) return arg_name in sig.parameters + + +def assert_signatures_in_sync( + source_func: Callable[..., Any], + check_func: Callable[..., Any], + *, + exclude_params: set[str] = set(), +) -> None: + """Ensure that the signature of the second function matches the first.""" + + check_sig = inspect.signature(check_func) + source_sig = inspect.signature(source_func) + + errors: list[str] = [] + + for name, source_param in source_sig.parameters.items(): + if name in exclude_params: + continue + + custom_param = check_sig.parameters.get(name) + if not custom_param: + errors.append(f"the `{name}` param is missing") + continue + + if custom_param.annotation != source_param.annotation: + errors.append( + f"types for the `{name}` param are do not match; source={repr(source_param.annotation)} checking={repr(source_param.annotation)}" + ) + continue + + if errors: + raise AssertionError(f"{len(errors)} errors encountered when comparing signatures:\n\n" + "\n\n".join(errors)) diff --git a/tests/test_client.py b/tests/test_client.py index 2e3679a9..b09a8db0 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -332,7 +332,7 @@ def test_default_headers_option(self) -> None: def test_validate_headers(self) -> None: client = Together(base_url=base_url, api_key=api_key, _strict_response_validation=True) request = client._build_request(FinalRequestOptions(method="get", url="/foo")) - assert request.headers.get("Authorization") == api_key + assert request.headers.get("Authorization") == f"Bearer {api_key}" with pytest.raises(TogetherError): client2 = Together(base_url=base_url, api_key=None, _strict_response_validation=True) @@ -1048,7 +1048,7 @@ def test_default_headers_option(self) -> None: def test_validate_headers(self) -> None: client = AsyncTogether(base_url=base_url, api_key=api_key, _strict_response_validation=True) request = client._build_request(FinalRequestOptions(method="get", url="/foo")) - assert request.headers.get("Authorization") == api_key + assert request.headers.get("Authorization") == f"Bearer {api_key}" with pytest.raises(TogetherError): client2 = AsyncTogether(base_url=base_url, api_key=None, _strict_response_validation=True) From 6bab9dadd17cacd94565c8f4df25c0ea6f83e987 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 8 Jul 2024 23:44:47 +0000 Subject: [PATCH 06/10] feat(api): OpenAPI spec update via Stainless API (#7) --- .devcontainer/Dockerfile | 2 +- .github/workflows/ci.yml | 4 ++-- .github/workflows/publish-pypi.yml | 4 ++-- .stats.yml | 2 +- requirements-dev.lock | 1 + requirements.lock | 1 + src/together/_base_client.py | 20 +++++++++++++++++-- src/together/resources/embeddings.py | 6 ++++-- src/together/types/embedding_create_params.py | 3 ++- 9 files changed, 32 insertions(+), 11 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 83bca8f7..ac9a2e75 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} USER vscode -RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.24.0" RYE_INSTALL_OPTION="--yes" bash +RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.35.0" RYE_INSTALL_OPTION="--yes" bash ENV PATH=/home/vscode/.rye/shims:$PATH RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8c339440..257f0561 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: curl -sSf https://rye.astral.sh/get | bash echo "$HOME/.rye/shims" >> $GITHUB_PATH env: - RYE_VERSION: 0.24.0 + RYE_VERSION: '0.35.0' RYE_INSTALL_OPTION: '--yes' - name: Install dependencies @@ -41,7 +41,7 @@ jobs: curl -sSf https://rye.astral.sh/get | bash echo "$HOME/.rye/shims" >> $GITHUB_PATH env: - RYE_VERSION: 0.24.0 + RYE_VERSION: '0.35.0' RYE_INSTALL_OPTION: '--yes' - name: Bootstrap diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index 632c0e94..fb499434 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -21,8 +21,8 @@ jobs: curl -sSf https://rye.astral.sh/get | bash echo "$HOME/.rye/shims" >> $GITHUB_PATH env: - RYE_VERSION: 0.24.0 - RYE_INSTALL_OPTION: "--yes" + RYE_VERSION: '0.35.0' + RYE_INSTALL_OPTION: '--yes' - name: Publish to PyPI run: | diff --git a/.stats.yml b/.stats.yml index cf0d3ce7..d40eba14 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 15 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-e8f4e11a2e3927c75dce42c913ef5c9adcf2aef3d3b1312b4825d9f135413c39.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-9ca35fd97a4194757393da2b0049a2e9900fd837a69afecfe01568b83796e299.yml diff --git a/requirements-dev.lock b/requirements-dev.lock index 826c91cb..63621c40 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -6,6 +6,7 @@ # features: [] # all-features: true # with-sources: false +# generate-hashes: false -e file:. annotated-types==0.6.0 diff --git a/requirements.lock b/requirements.lock index a6341f0f..5fd6ad03 100644 --- a/requirements.lock +++ b/requirements.lock @@ -6,6 +6,7 @@ # features: [] # all-features: true # with-sources: false +# generate-hashes: false -e file:. annotated-types==0.6.0 diff --git a/src/together/_base_client.py b/src/together/_base_client.py index a806809b..0f9eb67c 100644 --- a/src/together/_base_client.py +++ b/src/together/_base_client.py @@ -58,6 +58,7 @@ HttpxSendArgs, AsyncTransport, RequestOptions, + HttpxRequestFiles, ModelBuilderProtocol, ) from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping @@ -459,6 +460,7 @@ def _build_request( headers = self._build_headers(options) params = _merge_mappings(self.default_query, options.params) content_type = headers.get("Content-Type") + files = options.files # If the given Content-Type header is multipart/form-data then it # has to be removed so that httpx can generate the header with @@ -472,7 +474,7 @@ def _build_request( headers.pop("Content-Type") # As we are now sending multipart/form-data instead of application/json - # we need to tell httpx to use it, https://www.python-httpx.org/advanced/#multipart-file-encoding + # we need to tell httpx to use it, https://www.python-httpx.org/advanced/clients/#multipart-file-encoding if json_data: if not is_dict(json_data): raise TypeError( @@ -480,6 +482,15 @@ def _build_request( ) kwargs["data"] = self._serialize_multipartform(json_data) + # httpx determines whether or not to send a "multipart/form-data" + # request based on the truthiness of the "files" argument. + # This gets around that issue by generating a dict value that + # evaluates to true. + # + # https://github.com/encode/httpx/discussions/2399#discussioncomment-3814186 + if not files: + files = cast(HttpxRequestFiles, ForceMultipartDict()) + # TODO: report this error to httpx return self._client.build_request( # pyright: ignore[reportUnknownMemberType] headers=headers, @@ -492,7 +503,7 @@ def _build_request( # https://github.com/microsoft/pyright/issues/3526#event-6715453066 params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None, json=json_data, - files=options.files, + files=files, **kwargs, ) @@ -1863,6 +1874,11 @@ def make_request_options( return options +class ForceMultipartDict(Dict[str, None]): + def __bool__(self) -> bool: + return True + + class OtherPlatform: def __init__(self, name: str) -> None: self.name = name diff --git a/src/together/resources/embeddings.py b/src/together/resources/embeddings.py index 290c574d..d1fda42c 100644 --- a/src/together/resources/embeddings.py +++ b/src/together/resources/embeddings.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing import List, Union + import httpx from ..types import embedding_create_params @@ -38,7 +40,7 @@ def with_streaming_response(self) -> EmbeddingsResourceWithStreamingResponse: def create( self, *, - input: str, + input: Union[str, List[str]], model: str, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -91,7 +93,7 @@ def with_streaming_response(self) -> AsyncEmbeddingsResourceWithStreamingRespons async def create( self, *, - input: str, + input: Union[str, List[str]], model: str, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. diff --git a/src/together/types/embedding_create_params.py b/src/together/types/embedding_create_params.py index 11f45431..ed80b3e6 100644 --- a/src/together/types/embedding_create_params.py +++ b/src/together/types/embedding_create_params.py @@ -2,13 +2,14 @@ from __future__ import annotations +from typing import List, Union from typing_extensions import Required, TypedDict __all__ = ["EmbeddingCreateParams"] class EmbeddingCreateParams(TypedDict, total=False): - input: Required[str] + input: Required[Union[str, List[str]]] """A string providing the text for the model to embed.""" model: Required[str] From a7584db12d26cc55833ade61dae8ec29878d5ed1 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 20:48:54 +0000 Subject: [PATCH 07/10] feat(api): OpenAPI spec update via Stainless API (#8) --- .stats.yml | 2 +- src/together/_base_client.py | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/.stats.yml b/.stats.yml index d40eba14..f52da739 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 15 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-9ca35fd97a4194757393da2b0049a2e9900fd837a69afecfe01568b83796e299.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-9bc2a14025495e7ec3f0959f90457cdb215d4fe285151c29dcff03a47796d33a.yml diff --git a/src/together/_base_client.py b/src/together/_base_client.py index 0f9eb67c..3a1eacee 100644 --- a/src/together/_base_client.py +++ b/src/together/_base_client.py @@ -955,6 +955,11 @@ def _request( stream: bool, stream_cls: type[_StreamT] | None, ) -> ResponseT | _StreamT: + # create a copy of the options we were given so that if the + # options are mutated later & we then retry, the retries are + # given the original options + input_options = model_copy(options) + cast_to = self._maybe_override_cast_to(cast_to, options) self._prepare_options(options) @@ -979,7 +984,7 @@ def _request( if retries > 0: return self._retry_request( - options, + input_options, cast_to, retries, stream=stream, @@ -994,7 +999,7 @@ def _request( if retries > 0: return self._retry_request( - options, + input_options, cast_to, retries, stream=stream, @@ -1022,7 +1027,7 @@ def _request( if retries > 0 and self._should_retry(err.response): err.response.close() return self._retry_request( - options, + input_options, cast_to, retries, err.response.headers, @@ -1518,6 +1523,11 @@ async def _request( # execute it earlier while we are in an async context self._platform = await asyncify(get_platform)() + # create a copy of the options we were given so that if the + # options are mutated later & we then retry, the retries are + # given the original options + input_options = model_copy(options) + cast_to = self._maybe_override_cast_to(cast_to, options) await self._prepare_options(options) @@ -1540,7 +1550,7 @@ async def _request( if retries > 0: return await self._retry_request( - options, + input_options, cast_to, retries, stream=stream, @@ -1555,7 +1565,7 @@ async def _request( if retries > 0: return await self._retry_request( - options, + input_options, cast_to, retries, stream=stream, @@ -1578,7 +1588,7 @@ async def _request( if retries > 0 and self._should_retry(err.response): await err.response.aclose() return await self._retry_request( - options, + input_options, cast_to, retries, err.response.headers, From 04877a01b5a9dd3988ff8283c665fad4ca0c643a Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 16:51:12 +0000 Subject: [PATCH 08/10] feat(api): OpenAPI spec update via Stainless API (#9) --- .github/workflows/ci.yml | 1 + .stats.yml | 2 +- README.md | 2 +- requirements-dev.lock | 2 +- src/together/_base_client.py | 12 ++++++------ src/together/_compat.py | 6 +++--- src/together/_models.py | 8 ++++++++ src/together/types/chat/chat_completion_usage.py | 1 - 8 files changed, 21 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 257f0561..40293964 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,7 @@ on: pull_request: branches: - main + - next jobs: lint: diff --git a/.stats.yml b/.stats.yml index f52da739..29957ee8 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 15 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-9bc2a14025495e7ec3f0959f90457cdb215d4fe285151c29dcff03a47796d33a.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-bd614fbf1c23ddda5eba5554c84321d7c0e462f19465660c259d50c242fe8be8.yml diff --git a/README.md b/README.md index 9aba58bb..c993f3e1 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ It is generated with [Stainless](https://www.stainlessapi.com/). ## Documentation -The REST API documentation can be found [on docs.together.ai](https://docs.together.ai). The full API of this library can be found in [api.md](api.md). +The REST API documentation can be found on [docs.together.ai](https://docs.together.ai). The full API of this library can be found in [api.md](api.md). ## Installation diff --git a/requirements-dev.lock b/requirements-dev.lock index 63621c40..78809391 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -49,7 +49,7 @@ markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -mypy==1.7.1 +mypy==1.10.1 mypy-extensions==1.0.0 # via mypy nodeenv==1.8.0 diff --git a/src/together/_base_client.py b/src/together/_base_client.py index 3a1eacee..8a36d721 100644 --- a/src/together/_base_client.py +++ b/src/together/_base_client.py @@ -879,9 +879,9 @@ def __exit__( def _prepare_options( self, options: FinalRequestOptions, # noqa: ARG002 - ) -> None: + ) -> FinalRequestOptions: """Hook for mutating the given options""" - return None + return options def _prepare_request( self, @@ -961,7 +961,7 @@ def _request( input_options = model_copy(options) cast_to = self._maybe_override_cast_to(cast_to, options) - self._prepare_options(options) + options = self._prepare_options(options) retries = self._remaining_retries(remaining_retries, options) request = self._build_request(options) @@ -1442,9 +1442,9 @@ async def __aexit__( async def _prepare_options( self, options: FinalRequestOptions, # noqa: ARG002 - ) -> None: + ) -> FinalRequestOptions: """Hook for mutating the given options""" - return None + return options async def _prepare_request( self, @@ -1529,7 +1529,7 @@ async def _request( input_options = model_copy(options) cast_to = self._maybe_override_cast_to(cast_to, options) - await self._prepare_options(options) + options = await self._prepare_options(options) retries = self._remaining_retries(remaining_retries, options) request = self._build_request(options) diff --git a/src/together/_compat.py b/src/together/_compat.py index 74c7639b..c919b5ad 100644 --- a/src/together/_compat.py +++ b/src/together/_compat.py @@ -118,10 +118,10 @@ def get_model_fields(model: type[pydantic.BaseModel]) -> dict[str, FieldInfo]: return model.__fields__ # type: ignore -def model_copy(model: _ModelT) -> _ModelT: +def model_copy(model: _ModelT, *, deep: bool = False) -> _ModelT: if PYDANTIC_V2: - return model.model_copy() - return model.copy() # type: ignore + return model.model_copy(deep=deep) + return model.copy(deep=deep) # type: ignore def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str: diff --git a/src/together/_models.py b/src/together/_models.py index 5d95bb4b..eb7ce3bd 100644 --- a/src/together/_models.py +++ b/src/together/_models.py @@ -643,6 +643,14 @@ def validate_type(*, type_: type[_T], value: object) -> _T: return cast(_T, _validate_non_model_type(type_=type_, value=value)) +def set_pydantic_config(typ: Any, config: pydantic.ConfigDict) -> None: + """Add a pydantic config for the given type. + + Note: this is a no-op on Pydantic v1. + """ + setattr(typ, "__pydantic_config__", config) # noqa: B010 + + # our use of subclasssing here causes weirdness for type checkers, # so we just pretend that we don't subclass if TYPE_CHECKING: diff --git a/src/together/types/chat/chat_completion_usage.py b/src/together/types/chat/chat_completion_usage.py index 5e804ab5..82b9d450 100644 --- a/src/together/types/chat/chat_completion_usage.py +++ b/src/together/types/chat/chat_completion_usage.py @@ -1,7 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - from ..._models import BaseModel __all__ = ["ChatCompletionUsage"] From af93a5c78aaf2b9bf7f3c42f7ff19e06472ae5de Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 16:54:44 +0000 Subject: [PATCH 09/10] feat(api): OpenAPI spec update via Stainless API (#10) --- .stats.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.stats.yml b/.stats.yml index 29957ee8..229d59da 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,2 +1,2 @@ configured_endpoints: 15 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-bd614fbf1c23ddda5eba5554c84321d7c0e462f19465660c259d50c242fe8be8.yml +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2FTogetherAI-aa19594b663913393bdbc1b56903615e4eb84c6ebc60617ab2f451ede8a730c2.yml From b3b092c2a0cb4740cd7ec53804bcdaa19d006858 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 16 Jul 2024 16:55:08 +0000 Subject: [PATCH 10/10] release: 0.1.0-alpha.1 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 71 +++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- src/together/_version.py | 2 +- 4 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 CHANGELOG.md diff --git a/.release-please-manifest.json b/.release-please-manifest.json index c4762802..ba6c3483 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.0.1-alpha.0" + ".": "0.1.0-alpha.1" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..7b8f8381 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,71 @@ +# Changelog + +## 0.1.0-alpha.1 (2024-07-16) + +Full Changelog: [v0.0.1-alpha.0...v0.1.0-alpha.1](https://github.com/togethercomputer/together-py/compare/v0.0.1-alpha.0...v0.1.0-alpha.1) + +### Features + +* **api:** Config update for pstern-sl/dev ([0a841c4](https://github.com/togethercomputer/together-py/commit/0a841c447d833ee2a6008db4b2ddd4b75eb47fbd)) +* **api:** manual updates ([d43927b](https://github.com/togethercomputer/together-py/commit/d43927b37622bb7d233a178eceb21b2223bba1bc)) +* **api:** manual updates ([94cfef7](https://github.com/togethercomputer/together-py/commit/94cfef7ff7d499fc2e8dd7b1ad4fed9e908cd28a)) +* **api:** manual updates ([#6](https://github.com/togethercomputer/together-py/issues/6)) ([a25a797](https://github.com/togethercomputer/together-py/commit/a25a797f7f7d473ff3f2a939179e6576ec02f891)) +* **api:** OpenAPI spec update via Stainless API ([a78681d](https://github.com/togethercomputer/together-py/commit/a78681d3a8ea469844936ac4793f0a374a4d1af1)) +* **api:** OpenAPI spec update via Stainless API ([9d54568](https://github.com/togethercomputer/together-py/commit/9d54568072bbaef6b99bd0fbc54c451144f2e1f5)) +* **api:** OpenAPI spec update via Stainless API ([00c8693](https://github.com/togethercomputer/together-py/commit/00c86934ed1ab85f0ed1cbc5ecb069d94366b2cd)) +* **api:** OpenAPI spec update via Stainless API ([8609a6e](https://github.com/togethercomputer/together-py/commit/8609a6e8d13b50bf22ec67d0149c9ab51f5dea0e)) +* **api:** OpenAPI spec update via Stainless API ([3dc55d1](https://github.com/togethercomputer/together-py/commit/3dc55d1f4cd41e5a4054bd2a43a5595373db150c)) +* **api:** OpenAPI spec update via Stainless API ([add76c7](https://github.com/togethercomputer/together-py/commit/add76c7c0ef977dadc3b23f54c784a7f62b81528)) +* **api:** OpenAPI spec update via Stainless API ([5eaa129](https://github.com/togethercomputer/together-py/commit/5eaa1290359411361b99008695d2c786507d2073)) +* **api:** OpenAPI spec update via Stainless API ([d229eef](https://github.com/togethercomputer/together-py/commit/d229eeffe4022374b4d2fd9df208afe4c0fd21bb)) +* **api:** OpenAPI spec update via Stainless API ([643f5cf](https://github.com/togethercomputer/together-py/commit/643f5cfc1d6c3d4d1c77e2c6f27411c5df0845df)) +* **api:** OpenAPI spec update via Stainless API ([9ae4e1b](https://github.com/togethercomputer/together-py/commit/9ae4e1bf74193c6cc8d1509f3b05d816e5e071b4)) +* **api:** OpenAPI spec update via Stainless API ([#10](https://github.com/togethercomputer/together-py/issues/10)) ([af93a5c](https://github.com/togethercomputer/together-py/commit/af93a5c78aaf2b9bf7f3c42f7ff19e06472ae5de)) +* **api:** OpenAPI spec update via Stainless API ([#3](https://github.com/togethercomputer/together-py/issues/3)) ([cd703fb](https://github.com/togethercomputer/together-py/commit/cd703fbdb178f4f05ffc43af0e86f5218537ce5c)) +* **api:** OpenAPI spec update via Stainless API ([#4](https://github.com/togethercomputer/together-py/issues/4)) ([00ef6cc](https://github.com/togethercomputer/together-py/commit/00ef6cc33f844ef3d214e805f3bdfa28240905b7)) +* **api:** OpenAPI spec update via Stainless API ([#5](https://github.com/togethercomputer/together-py/issues/5)) ([3e9827b](https://github.com/togethercomputer/together-py/commit/3e9827b08f2698029e31df3d770d7f873b9d610d)) +* **api:** OpenAPI spec update via Stainless API ([#7](https://github.com/togethercomputer/together-py/issues/7)) ([6bab9da](https://github.com/togethercomputer/together-py/commit/6bab9dadd17cacd94565c8f4df25c0ea6f83e987)) +* **api:** OpenAPI spec update via Stainless API ([#8](https://github.com/togethercomputer/together-py/issues/8)) ([a7584db](https://github.com/togethercomputer/together-py/commit/a7584db12d26cc55833ade61dae8ec29878d5ed1)) +* **api:** OpenAPI spec update via Stainless API ([#9](https://github.com/togethercomputer/together-py/issues/9)) ([04877a0](https://github.com/togethercomputer/together-py/commit/04877a01b5a9dd3988ff8283c665fad4ca0c643a)) +* **api:** rename api key ([b7b55e6](https://github.com/togethercomputer/together-py/commit/b7b55e632590fbe2425be79f332352ba8367e365)) +* **api:** update via SDK Studio ([5866250](https://github.com/togethercomputer/together-py/commit/58662506963afd2ed777fa3efa9f35263689437c)) +* **api:** update via SDK Studio ([27bbc3c](https://github.com/togethercomputer/together-py/commit/27bbc3c53d9e8849d7e7099bee417ef99260eece)) +* **api:** update via SDK Studio ([f7c11ec](https://github.com/togethercomputer/together-py/commit/f7c11ecec9f83889385b710e8270f9159f013bb1)) +* **api:** update via SDK Studio ([22a5f1f](https://github.com/togethercomputer/together-py/commit/22a5f1f01c5dea75a28763bcb991e5276ed9efa4)) +* **api:** update via SDK Studio ([159534b](https://github.com/togethercomputer/together-py/commit/159534b4efeabd8f445037f38af6acd4342c7e7f)) +* **api:** update via SDK Studio ([30663ec](https://github.com/togethercomputer/together-py/commit/30663ec91f215ba7135dd8723e2876cf1bf70dde)) +* **api:** update via SDK Studio ([6561269](https://github.com/togethercomputer/together-py/commit/6561269416ba964bc0b2d452474017cd8036d666)) +* **api:** update via SDK Studio ([72bad68](https://github.com/togethercomputer/together-py/commit/72bad68007c5e595fa65bcff9e268aca93cb0bef)) +* **api:** update via SDK Studio ([59cce01](https://github.com/togethercomputer/together-py/commit/59cce011f234371b089e375cca57f9984ead2a8e)) +* **api:** update via SDK Studio ([b2b0177](https://github.com/togethercomputer/together-py/commit/b2b017748247196d975cdbc51c4fe5bea23b5bbf)) +* **api:** update via SDK Studio ([331cc46](https://github.com/togethercomputer/together-py/commit/331cc4626448b1e5546ae11c4bd0b90f106094c6)) +* **api:** update via SDK Studio ([6a57974](https://github.com/togethercomputer/together-py/commit/6a57974a5ae311f3f0faa917191964c09579c7bd)) +* **api:** update via SDK Studio ([80c35ee](https://github.com/togethercomputer/together-py/commit/80c35ee69b20f6a9b78512be0344e71e0850bb29)) +* **api:** update via SDK Studio ([668c023](https://github.com/togethercomputer/together-py/commit/668c02366615c5b073b29b03e45ae17ffe668bca)) +* **api:** update via SDK Studio ([a592cff](https://github.com/togethercomputer/together-py/commit/a592cffcc08f9831bdd414168b2e57b45ce42c08)) +* **api:** update via SDK Studio ([733f0b0](https://github.com/togethercomputer/together-py/commit/733f0b0917d8627014c2106a510a4b1322fb8927)) +* **api:** update via SDK Studio ([5095404](https://github.com/togethercomputer/together-py/commit/50954043bcc19bad0ffc23207e8074fcc83a6212)) +* **api:** update via SDK Studio ([d3b6a64](https://github.com/togethercomputer/together-py/commit/d3b6a6403251badab836ff9a75d060afb97440cb)) +* **api:** update via SDK Studio ([adf918b](https://github.com/togethercomputer/together-py/commit/adf918b5c13d36d086d42847a249df124cda119b)) +* **api:** update via SDK Studio ([a79da8e](https://github.com/togethercomputer/together-py/commit/a79da8ea98ed471fc23af36c30696fb910cc6657)) +* **api:** update via SDK Studio ([44b426f](https://github.com/togethercomputer/together-py/commit/44b426fca286acecfbe37b1cef802f40ba73496e)) +* **api:** update via SDK Studio ([1f7c7fe](https://github.com/togethercomputer/together-py/commit/1f7c7fe55e6c728c97df57147f5ae9c072f76e3b)) +* **api:** update via SDK Studio ([500e41b](https://github.com/togethercomputer/together-py/commit/500e41b1eb4c960d5e14fe069251ef887f0e4976)) +* **api:** update via SDK Studio ([ca665ed](https://github.com/togethercomputer/together-py/commit/ca665edb80300b97e269976e3f966308afc50e4a)) +* **api:** updates ([3591c56](https://github.com/togethercomputer/together-py/commit/3591c56336cd5a7cd98c23feed5ae5fc737bcafb)) +* update via SDK Studio ([c56e7d1](https://github.com/togethercomputer/together-py/commit/c56e7d1b19533d687c1dd23d35118546699be8b7)) +* update via SDK Studio ([90adf12](https://github.com/togethercomputer/together-py/commit/90adf128d816a262f51c4dcc4a39b6693c7c746f)) +* update via SDK Studio ([b75aa7f](https://github.com/togethercomputer/together-py/commit/b75aa7f8c46573e6047abc7f9bd03bcc6d90cfe7)) +* update via SDK Studio ([48c9e19](https://github.com/togethercomputer/together-py/commit/48c9e1941baade2916cd4bf56becc42e35052d3a)) +* update via SDK Studio ([592853d](https://github.com/togethercomputer/together-py/commit/592853d727033ea9421ed58576ae15325aca535f)) +* update via SDK Studio ([611badd](https://github.com/togethercomputer/together-py/commit/611baddd1f735c4287e052798812a23f61213717)) +* update via SDK Studio ([a84defc](https://github.com/togethercomputer/together-py/commit/a84defc9ab5274d5eafc9190055083322b8fb93f)) +* update via SDK Studio ([3c83f12](https://github.com/togethercomputer/together-py/commit/3c83f120ee2b10c4ec2c0e359eaf9f1968f85dcb)) +* update via SDK Studio ([67d01b0](https://github.com/togethercomputer/together-py/commit/67d01b03b05ee598539b68d70185192862fb0a29)) +* update via SDK Studio ([065b990](https://github.com/togethercomputer/together-py/commit/065b9903a0c0e9eb67a591d51abbb27e08020ef5)) + + +### Chores + +* go live ([#1](https://github.com/togethercomputer/together-py/issues/1)) ([9c9e672](https://github.com/togethercomputer/together-py/commit/9c9e67276776b7169bd2e9066c6049f5237ed044)) +* update SDK settings ([e082ad6](https://github.com/togethercomputer/together-py/commit/e082ad6d7beff79ae5301f63d7b334aeebc12024)) diff --git a/pyproject.toml b/pyproject.toml index 3d54bfb5..d771f67d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "together" -version = "0.0.1-alpha.0" +version = "0.1.0-alpha.1" description = "The official Python library for the together API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/together/_version.py b/src/together/_version.py index e4f5dac8..2b114bd1 100644 --- a/src/together/_version.py +++ b/src/together/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "together" -__version__ = "0.0.1-alpha.0" # x-release-please-version +__version__ = "0.1.0-alpha.1" # x-release-please-version