From 4c3b732d55c852ba6b45cbed189dc729e660d6e2 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 21 Oct 2025 23:37:10 +0000 Subject: [PATCH 1/2] feat(api): Rename evaluation sdks to evals --- .stats.yml | 2 +- MIGRATION.md | 2 +- api.md | 30 +- src/client.ts | 39 +-- src/resources/{evaluation.ts => evals.ts} | 261 ++++++++++++++++-- src/resources/evaluations.ts | 235 ---------------- src/resources/index.ts | 17 +- .../{evaluations.test.ts => evals.test.ts} | 30 +- tests/api-resources/evaluation.test.ts | 32 --- 9 files changed, 303 insertions(+), 345 deletions(-) rename src/resources/{evaluation.ts => evals.ts} (54%) delete mode 100644 src/resources/evaluations.ts rename tests/api-resources/{evaluations.test.ts => evals.test.ts} (53%) delete mode 100644 tests/api-resources/evaluation.test.ts diff --git a/.stats.yml b/.stats.yml index 0c41fb15..6bc34489 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 41 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-bf2f8f13aa9e13553634ad6efce85e7fa77302f9ddcf20f7b91f6ca1c85667ca.yml openapi_spec_hash: 2306f08b88e1f59c5750f4573f003fdc -config_hash: cf71a25b7e10f3a39a183524fa0ba05c +config_hash: 73457be4d72f0bf4c22de49f2b2d4ec3 diff --git a/MIGRATION.md b/MIGRATION.md index fb1cb8cc..a811c2ff 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -69,7 +69,7 @@ This affects the following methods: - `client.endpoints.list()` - `client.hardware.list()` -- `client.evaluations.list()` +- `client.evals.list()` ### Removed `httpAgent` in favor of `fetchOptions` diff --git a/api.md b/api.md index 259561c4..4ee67829 100644 --- a/api.md +++ b/api.md @@ -248,28 +248,20 @@ Methods: - client.batches.retrieve(id) -> BatchRetrieveResponse - client.batches.list() -> BatchListResponse -# Evaluation +# Evals Types: -- EvaluationJudgeModelConfig -- EvaluationModelRequest -- EvaluationRetrieveResponse -- EvaluationGetStatusResponse +- EvaluationJudgeModelConfig +- EvaluationModelRequest +- EvalRetrieveResponse +- EvalListResponse +- EvalGetAllowedModelsResponse +- EvalGetStatusResponse Methods: -- client.evaluation.retrieve(id) -> EvaluationRetrieveResponse -- client.evaluation.getStatus(id) -> EvaluationGetStatusResponse - -# Evaluations - -Types: - -- EvaluationListResponse -- EvaluationGetAllowedModelsResponse - -Methods: - -- client.evaluations.list({ ...params }) -> EvaluationListResponse -- client.evaluations.getAllowedModels() -> EvaluationGetAllowedModelsResponse +- client.evals.retrieve(id) -> EvalRetrieveResponse +- client.evals.list({ ...params }) -> EvalListResponse +- client.evals.getAllowedModels() -> EvalGetAllowedModelsResponse +- client.evals.getStatus(id) -> EvalGetStatusResponse diff --git a/src/client.ts b/src/client.ts index 9e14084c..18fcd3ad 100644 --- a/src/client.ts +++ b/src/client.ts @@ -49,18 +49,15 @@ import { Endpoints, } from './resources/endpoints'; import { - Evaluation, - EvaluationGetStatusResponse, + EvalGetAllowedModelsResponse, + EvalGetStatusResponse, + EvalListParams, + EvalListResponse, + EvalRetrieveResponse, + Evals, EvaluationJudgeModelConfig, EvaluationModelRequest, - EvaluationRetrieveResponse, -} from './resources/evaluation'; -import { - EvaluationGetAllowedModelsResponse, - EvaluationListParams, - EvaluationListResponse, - Evaluations, -} from './resources/evaluations'; +} from './resources/evals'; import { FileDeleteResponse, FileListResponse, @@ -843,8 +840,7 @@ export class Together { endpoints: API.Endpoints = new API.Endpoints(this); hardware: API.Hardware = new API.Hardware(this); batches: API.Batches = new API.Batches(this); - evaluation: API.Evaluation = new API.Evaluation(this); - evaluations: API.Evaluations = new API.Evaluations(this); + evals: API.Evals = new API.Evals(this); } Together.Chat = Chat; @@ -861,8 +857,7 @@ Together.Jobs = Jobs; Together.Endpoints = Endpoints; Together.Hardware = Hardware; Together.Batches = Batches; -Together.Evaluation = Evaluation; -Together.Evaluations = Evaluations; +Together.Evals = Evals; export declare namespace Together { export type RequestOptions = Opts.RequestOptions; @@ -992,17 +987,13 @@ export declare namespace Together { }; export { - Evaluation as Evaluation, + Evals as Evals, type EvaluationJudgeModelConfig as EvaluationJudgeModelConfig, type EvaluationModelRequest as EvaluationModelRequest, - type EvaluationRetrieveResponse as EvaluationRetrieveResponse, - type EvaluationGetStatusResponse as EvaluationGetStatusResponse, - }; - - export { - Evaluations as Evaluations, - type EvaluationListResponse as EvaluationListResponse, - type EvaluationGetAllowedModelsResponse as EvaluationGetAllowedModelsResponse, - type EvaluationListParams as EvaluationListParams, + type EvalRetrieveResponse as EvalRetrieveResponse, + type EvalListResponse as EvalListResponse, + type EvalGetAllowedModelsResponse as EvalGetAllowedModelsResponse, + type EvalGetStatusResponse as EvalGetStatusResponse, + type EvalListParams as EvalListParams, }; } diff --git a/src/resources/evaluation.ts b/src/resources/evals.ts similarity index 54% rename from src/resources/evaluation.ts rename to src/resources/evals.ts index 6067de4e..eecb07e8 100644 --- a/src/resources/evaluation.ts +++ b/src/resources/evals.ts @@ -5,18 +5,35 @@ import { APIPromise } from '../core/api-promise'; import { RequestOptions } from '../internal/request-options'; import { path } from '../internal/utils/path'; -export class Evaluation extends APIResource { +export class Evals extends APIResource { /** * Get details of a specific evaluation job */ - retrieve(id: string, options?: RequestOptions): APIPromise { + retrieve(id: string, options?: RequestOptions): APIPromise { return this._client.get(path`/evaluation/${id}`, options); } + /** + * Get a list of evaluation jobs with optional filtering + */ + list( + query: EvalListParams | null | undefined = {}, + options?: RequestOptions, + ): APIPromise { + return this._client.get('/evaluations', { query, ...options }); + } + + /** + * Get the list of models that are allowed for evaluation + */ + getAllowedModels(options?: RequestOptions): APIPromise { + return this._client.get('/evaluations/model-list', options); + } + /** * Get the status and results of a specific evaluation job */ - getStatus(id: string, options?: RequestOptions): APIPromise { + getStatus(id: string, options?: RequestOptions): APIPromise { return this._client.get(path`/evaluation/${id}/status`, options); } } @@ -60,7 +77,7 @@ export interface EvaluationModelRequest { temperature: number; } -export interface EvaluationRetrieveResponse { +export interface EvalRetrieveResponse { /** * When the job was created */ @@ -80,10 +97,10 @@ export interface EvaluationRetrieveResponse { * Results of the evaluation (when completed) */ results?: - | EvaluationRetrieveResponse.EvaluationClassifyResults - | EvaluationRetrieveResponse.EvaluationScoreResults - | EvaluationRetrieveResponse.EvaluationCompareResults - | EvaluationRetrieveResponse.Error + | EvalRetrieveResponse.EvaluationClassifyResults + | EvalRetrieveResponse.EvaluationScoreResults + | EvalRetrieveResponse.EvaluationCompareResults + | EvalRetrieveResponse.Error | null; /** @@ -94,7 +111,7 @@ export interface EvaluationRetrieveResponse { /** * History of status updates (admin only) */ - status_updates?: Array; + status_updates?: Array; /** * The type of evaluation @@ -112,7 +129,7 @@ export interface EvaluationRetrieveResponse { workflow_id?: string; } -export namespace EvaluationRetrieveResponse { +export namespace EvalRetrieveResponse { export interface EvaluationClassifyResults { /** * Number of failed generations. @@ -243,18 +260,209 @@ export namespace EvaluationRetrieveResponse { } } -export interface EvaluationGetStatusResponse { +export type EvalListResponse = Array; + +export namespace EvalListResponse { + export interface EvalListResponseItem { + /** + * When the job was created + */ + created_at?: string; + + /** + * ID of the job owner (admin only) + */ + owner_id?: string; + + /** + * The parameters used for this evaluation + */ + parameters?: { [key: string]: unknown }; + + /** + * Results of the evaluation (when completed) + */ + results?: + | EvalListResponseItem.EvaluationClassifyResults + | EvalListResponseItem.EvaluationScoreResults + | EvalListResponseItem.EvaluationCompareResults + | EvalListResponseItem.Error + | null; + + /** + * Current status of the job + */ + status?: 'pending' | 'queued' | 'running' | 'completed' | 'error' | 'user_error'; + + /** + * History of status updates (admin only) + */ + status_updates?: Array; + + /** + * The type of evaluation + */ + type?: 'classify' | 'score' | 'compare'; + + /** + * When the job was last updated + */ + updated_at?: string; + + /** + * The evaluation job ID + */ + workflow_id?: string; + } + + export namespace EvalListResponseItem { + export interface EvaluationClassifyResults { + /** + * Number of failed generations. + */ + generation_fail_count?: number | null; + + /** + * Number of invalid labels + */ + invalid_label_count?: number | null; + + /** + * Number of failed judge generations + */ + judge_fail_count?: number | null; + + /** + * JSON string representing label counts + */ + label_counts?: string; + + /** + * Pecentage of pass labels. + */ + pass_percentage?: number | null; + + /** + * Data File ID + */ + result_file_id?: string; + } + + export interface EvaluationScoreResults { + aggregated_scores?: EvaluationScoreResults.AggregatedScores; + + /** + * number of failed samples generated from model + */ + failed_samples?: number; + + /** + * Number of failed generations. + */ + generation_fail_count?: number | null; + + /** + * number of invalid scores generated from model + */ + invalid_score_count?: number; + + /** + * Number of failed judge generations + */ + judge_fail_count?: number | null; + + /** + * Data File ID + */ + result_file_id?: string; + } + + export namespace EvaluationScoreResults { + export interface AggregatedScores { + mean_score?: number; + + pass_percentage?: number; + + std_score?: number; + } + } + + export interface EvaluationCompareResults { + /** + * Number of times model A won + */ + A_wins?: number; + + /** + * Number of times model B won + */ + B_wins?: number; + + /** + * Number of failed generations. + */ + generation_fail_count?: number | null; + + /** + * Number of failed judge generations + */ + judge_fail_count?: number | null; + + /** + * Total number of samples compared + */ + num_samples?: number; + + /** + * Data File ID + */ + result_file_id?: string; + + /** + * Number of ties + */ + Ties?: number; + } + + export interface Error { + error?: string; + } + + export interface StatusUpdate { + /** + * Additional message for this update + */ + message?: string; + + /** + * The status at this update + */ + status?: string; + + /** + * When this update occurred + */ + timestamp?: string; + } + } +} + +export interface EvalGetAllowedModelsResponse { + model_list?: Array; +} + +export interface EvalGetStatusResponse { results?: - | EvaluationGetStatusResponse.EvaluationClassifyResults - | EvaluationGetStatusResponse.EvaluationScoreResults - | EvaluationGetStatusResponse.EvaluationCompareResults - | EvaluationGetStatusResponse.Error + | EvalGetStatusResponse.EvaluationClassifyResults + | EvalGetStatusResponse.EvaluationScoreResults + | EvalGetStatusResponse.EvaluationCompareResults + | EvalGetStatusResponse.Error | null; status?: 'pending' | 'queued' | 'running' | 'completed' | 'error' | 'user_error'; } -export namespace EvaluationGetStatusResponse { +export namespace EvalGetStatusResponse { export interface EvaluationClassifyResults { /** * Number of failed generations. @@ -368,11 +576,26 @@ export namespace EvaluationGetStatusResponse { } } -export declare namespace Evaluation { +export interface EvalListParams { + /** + * Maximum number of results to return (max 100) + */ + limit?: number; + + /** + * Filter by job status + */ + status?: 'pending' | 'queued' | 'running' | 'completed' | 'error' | 'user_error'; +} + +export declare namespace Evals { export { type EvaluationJudgeModelConfig as EvaluationJudgeModelConfig, type EvaluationModelRequest as EvaluationModelRequest, - type EvaluationRetrieveResponse as EvaluationRetrieveResponse, - type EvaluationGetStatusResponse as EvaluationGetStatusResponse, + type EvalRetrieveResponse as EvalRetrieveResponse, + type EvalListResponse as EvalListResponse, + type EvalGetAllowedModelsResponse as EvalGetAllowedModelsResponse, + type EvalGetStatusResponse as EvalGetStatusResponse, + type EvalListParams as EvalListParams, }; } diff --git a/src/resources/evaluations.ts b/src/resources/evaluations.ts deleted file mode 100644 index b4df08ae..00000000 --- a/src/resources/evaluations.ts +++ /dev/null @@ -1,235 +0,0 @@ -// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -import { APIResource } from '../core/resource'; -import { APIPromise } from '../core/api-promise'; -import { RequestOptions } from '../internal/request-options'; - -export class Evaluations extends APIResource { - /** - * Get a list of evaluation jobs with optional filtering - */ - list( - query: EvaluationListParams | null | undefined = {}, - options?: RequestOptions, - ): APIPromise { - return this._client.get('/evaluations', { query, ...options }); - } - - /** - * Get the list of models that are allowed for evaluation - */ - getAllowedModels(options?: RequestOptions): APIPromise { - return this._client.get('/evaluations/model-list', options); - } -} - -export type EvaluationListResponse = Array; - -export namespace EvaluationListResponse { - export interface EvaluationListResponseItem { - /** - * When the job was created - */ - created_at?: string; - - /** - * ID of the job owner (admin only) - */ - owner_id?: string; - - /** - * The parameters used for this evaluation - */ - parameters?: { [key: string]: unknown }; - - /** - * Results of the evaluation (when completed) - */ - results?: - | EvaluationListResponseItem.EvaluationClassifyResults - | EvaluationListResponseItem.EvaluationScoreResults - | EvaluationListResponseItem.EvaluationCompareResults - | EvaluationListResponseItem.Error - | null; - - /** - * Current status of the job - */ - status?: 'pending' | 'queued' | 'running' | 'completed' | 'error' | 'user_error'; - - /** - * History of status updates (admin only) - */ - status_updates?: Array; - - /** - * The type of evaluation - */ - type?: 'classify' | 'score' | 'compare'; - - /** - * When the job was last updated - */ - updated_at?: string; - - /** - * The evaluation job ID - */ - workflow_id?: string; - } - - export namespace EvaluationListResponseItem { - export interface EvaluationClassifyResults { - /** - * Number of failed generations. - */ - generation_fail_count?: number | null; - - /** - * Number of invalid labels - */ - invalid_label_count?: number | null; - - /** - * Number of failed judge generations - */ - judge_fail_count?: number | null; - - /** - * JSON string representing label counts - */ - label_counts?: string; - - /** - * Pecentage of pass labels. - */ - pass_percentage?: number | null; - - /** - * Data File ID - */ - result_file_id?: string; - } - - export interface EvaluationScoreResults { - aggregated_scores?: EvaluationScoreResults.AggregatedScores; - - /** - * number of failed samples generated from model - */ - failed_samples?: number; - - /** - * Number of failed generations. - */ - generation_fail_count?: number | null; - - /** - * number of invalid scores generated from model - */ - invalid_score_count?: number; - - /** - * Number of failed judge generations - */ - judge_fail_count?: number | null; - - /** - * Data File ID - */ - result_file_id?: string; - } - - export namespace EvaluationScoreResults { - export interface AggregatedScores { - mean_score?: number; - - pass_percentage?: number; - - std_score?: number; - } - } - - export interface EvaluationCompareResults { - /** - * Number of times model A won - */ - A_wins?: number; - - /** - * Number of times model B won - */ - B_wins?: number; - - /** - * Number of failed generations. - */ - generation_fail_count?: number | null; - - /** - * Number of failed judge generations - */ - judge_fail_count?: number | null; - - /** - * Total number of samples compared - */ - num_samples?: number; - - /** - * Data File ID - */ - result_file_id?: string; - - /** - * Number of ties - */ - Ties?: number; - } - - export interface Error { - error?: string; - } - - export interface StatusUpdate { - /** - * Additional message for this update - */ - message?: string; - - /** - * The status at this update - */ - status?: string; - - /** - * When this update occurred - */ - timestamp?: string; - } - } -} - -export interface EvaluationGetAllowedModelsResponse { - model_list?: Array; -} - -export interface EvaluationListParams { - /** - * Maximum number of results to return (max 100) - */ - limit?: number; - - /** - * Filter by job status - */ - status?: 'pending' | 'queued' | 'running' | 'completed' | 'error' | 'user_error'; -} - -export declare namespace Evaluations { - export { - type EvaluationListResponse as EvaluationListResponse, - type EvaluationGetAllowedModelsResponse as EvaluationGetAllowedModelsResponse, - type EvaluationListParams as EvaluationListParams, - }; -} diff --git a/src/resources/index.ts b/src/resources/index.ts index 92cca772..6374b824 100644 --- a/src/resources/index.ts +++ b/src/resources/index.ts @@ -45,18 +45,15 @@ export { type EndpointListParams, } from './endpoints'; export { - Evaluation, + Evals, type EvaluationJudgeModelConfig, type EvaluationModelRequest, - type EvaluationRetrieveResponse, - type EvaluationGetStatusResponse, -} from './evaluation'; -export { - Evaluations, - type EvaluationListResponse, - type EvaluationGetAllowedModelsResponse, - type EvaluationListParams, -} from './evaluations'; + type EvalRetrieveResponse, + type EvalListResponse, + type EvalGetAllowedModelsResponse, + type EvalGetStatusResponse, + type EvalListParams, +} from './evals'; export { Files, type FileObject, diff --git a/tests/api-resources/evaluations.test.ts b/tests/api-resources/evals.test.ts similarity index 53% rename from tests/api-resources/evaluations.test.ts rename to tests/api-resources/evals.test.ts index b675978e..45c9be34 100644 --- a/tests/api-resources/evaluations.test.ts +++ b/tests/api-resources/evals.test.ts @@ -7,9 +7,20 @@ const client = new Together({ baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010', }); -describe('resource evaluations', () => { +describe('resource evals', () => { + test('retrieve', async () => { + const responsePromise = client.evals.retrieve('id'); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + test('list', async () => { - const responsePromise = client.evaluations.list(); + const responsePromise = client.evals.list(); const rawResponse = await responsePromise.asResponse(); expect(rawResponse).toBeInstanceOf(Response); const response = await responsePromise; @@ -22,12 +33,23 @@ describe('resource evaluations', () => { test('list: request options and params are passed correctly', async () => { // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error await expect( - client.evaluations.list({ limit: 1, status: 'pending' }, { path: '/_stainless_unknown_path' }), + client.evals.list({ limit: 1, status: 'pending' }, { path: '/_stainless_unknown_path' }), ).rejects.toThrow(Together.NotFoundError); }); test('getAllowedModels', async () => { - const responsePromise = client.evaluations.getAllowedModels(); + const responsePromise = client.evals.getAllowedModels(); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('getStatus', async () => { + const responsePromise = client.evals.getStatus('id'); const rawResponse = await responsePromise.asResponse(); expect(rawResponse).toBeInstanceOf(Response); const response = await responsePromise; diff --git a/tests/api-resources/evaluation.test.ts b/tests/api-resources/evaluation.test.ts deleted file mode 100644 index 659e88fd..00000000 --- a/tests/api-resources/evaluation.test.ts +++ /dev/null @@ -1,32 +0,0 @@ -// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -import Together from 'together-ai'; - -const client = new Together({ - apiKey: 'My API Key', - baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010', -}); - -describe('resource evaluation', () => { - test('retrieve', async () => { - const responsePromise = client.evaluation.retrieve('id'); - const rawResponse = await responsePromise.asResponse(); - expect(rawResponse).toBeInstanceOf(Response); - const response = await responsePromise; - expect(response).not.toBeInstanceOf(Response); - const dataAndResponse = await responsePromise.withResponse(); - expect(dataAndResponse.data).toBe(response); - expect(dataAndResponse.response).toBe(rawResponse); - }); - - test('getStatus', async () => { - const responsePromise = client.evaluation.getStatus('id'); - const rawResponse = await responsePromise.asResponse(); - expect(rawResponse).toBeInstanceOf(Response); - const response = await responsePromise; - expect(response).not.toBeInstanceOf(Response); - const dataAndResponse = await responsePromise.withResponse(); - expect(dataAndResponse.data).toBe(response); - expect(dataAndResponse.response).toBe(rawResponse); - }); -}); From f56eabfcd46eb826339afa0a13d74023025f4e85 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Tue, 21 Oct 2025 23:37:26 +0000 Subject: [PATCH 2/2] release: 0.27.0 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 8 ++++++++ package.json | 2 +- src/version.ts | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index c84411f5..2b114496 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.26.0" + ".": "0.27.0" } diff --git a/CHANGELOG.md b/CHANGELOG.md index a3abac3d..51fbc58e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 0.27.0 (2025-10-21) + +Full Changelog: [v0.26.0...v0.27.0](https://github.com/togethercomputer/together-typescript/compare/v0.26.0...v0.27.0) + +### Features + +* **api:** Rename evaluation sdks to evals ([4c3b732](https://github.com/togethercomputer/together-typescript/commit/4c3b732d55c852ba6b45cbed189dc729e660d6e2)) + ## 0.26.0 (2025-10-21) Full Changelog: [v0.25.0...v0.26.0](https://github.com/togethercomputer/together-typescript/compare/v0.25.0...v0.26.0) diff --git a/package.json b/package.json index 4a6c1ae5..c7311a89 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "together-ai", - "version": "0.26.0", + "version": "0.27.0", "description": "The official TypeScript library for the Together API", "author": "Together ", "types": "dist/index.d.ts", diff --git a/src/version.ts b/src/version.ts index aa25151e..288031cf 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const VERSION = '0.26.0'; // x-release-please-version +export const VERSION = '0.27.0'; // x-release-please-version