From 6bf136426625aa96535c8048af78f5b1596a4994 Mon Sep 17 00:00:00 2001 From: Laure-di Date: Wed, 12 Mar 2025 11:04:00 +0100 Subject: [PATCH] feat(inference): add support v1 --- .../clients/src/api/inference/v1/api.gen.ts | 372 ++++++++++++++ .../src/api/inference/v1/content.gen.ts | 16 + .../clients/src/api/inference/v1/index.gen.ts | 41 ++ .../src/api/inference/v1/marshalling.gen.ts | 370 ++++++++++++++ .../clients/src/api/inference/v1/types.gen.ts | 455 ++++++++++++++++++ .../api/inference/v1/validation-rules.gen.ts | 65 +++ 6 files changed, 1319 insertions(+) create mode 100644 packages/clients/src/api/inference/v1/api.gen.ts create mode 100644 packages/clients/src/api/inference/v1/content.gen.ts create mode 100644 packages/clients/src/api/inference/v1/index.gen.ts create mode 100644 packages/clients/src/api/inference/v1/marshalling.gen.ts create mode 100644 packages/clients/src/api/inference/v1/types.gen.ts create mode 100644 packages/clients/src/api/inference/v1/validation-rules.gen.ts diff --git a/packages/clients/src/api/inference/v1/api.gen.ts b/packages/clients/src/api/inference/v1/api.gen.ts new file mode 100644 index 000000000..1fad28969 --- /dev/null +++ b/packages/clients/src/api/inference/v1/api.gen.ts @@ -0,0 +1,372 @@ +// This file was automatically generated. DO NOT EDIT. +// If you have any remark or suggestion do not hesitate to open an issue. +import { + API as ParentAPI, + enrichForPagination, + urlParams, + validatePathParam, + waitForResource, +} from '../../../bridge' +import type { Region as ScwRegion, WaitForOptions } from '../../../bridge' +import { + DEPLOYMENT_TRANSIENT_STATUSES, + MODEL_TRANSIENT_STATUSES, +} from './content.gen' +import { + marshalCreateDeploymentRequest, + marshalCreateEndpointRequest, + marshalCreateModelRequest, + marshalUpdateDeploymentRequest, + marshalUpdateEndpointRequest, + unmarshalDeployment, + unmarshalEndpoint, + unmarshalListDeploymentsResponse, + unmarshalListModelsResponse, + unmarshalListNodeTypesResponse, + unmarshalModel, +} from './marshalling.gen' +import type { + CreateDeploymentRequest, + CreateEndpointRequest, + CreateModelRequest, + DeleteDeploymentRequest, + DeleteEndpointRequest, + DeleteModelRequest, + Deployment, + Endpoint, + GetDeploymentCertificateRequest, + GetDeploymentRequest, + GetModelRequest, + ListDeploymentsRequest, + ListDeploymentsResponse, + ListModelsRequest, + ListModelsResponse, + ListNodeTypesRequest, + ListNodeTypesResponse, + Model, + UpdateDeploymentRequest, + UpdateEndpointRequest, +} from './types.gen' + +const jsonContentHeaders = { + 'Content-Type': 'application/json; charset=utf-8', +} + +/** + * Managed Inference API. + * + * This API allows you to handle your Managed Inference services. + */ +export class API extends ParentAPI { + /** Lists the available regions of the API. */ + public static readonly LOCALITIES: ScwRegion[] = ['fr-par'] + + protected pageOfListDeployments = ( + request: Readonly = {}, + ) => + this.client.fetch( + { + method: 'GET', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/deployments`, + urlParams: urlParams( + ['name', request.name], + ['order_by', request.orderBy], + ['organization_id', request.organizationId], + ['page', request.page], + [ + 'page_size', + request.pageSize ?? this.client.settings.defaultPageSize, + ], + ['project_id', request.projectId], + ['tags', request.tags], + ), + }, + unmarshalListDeploymentsResponse, + ) + + /** + * List inference deployments. List all your inference deployments. + * + * @param request - The request {@link ListDeploymentsRequest} + * @returns A Promise of ListDeploymentsResponse + */ + listDeployments = (request: Readonly = {}) => + enrichForPagination('deployments', this.pageOfListDeployments, request) + + /** + * Get a deployment. Get the deployment for the given ID. + * + * @param request - The request {@link GetDeploymentRequest} + * @returns A Promise of Deployment + */ + getDeployment = (request: Readonly) => + this.client.fetch( + { + method: 'GET', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/deployments/${validatePathParam('deploymentId', request.deploymentId)}`, + }, + unmarshalDeployment, + ) + + /** + * Waits for {@link Deployment} to be in a final state. + * + * @param request - The request {@link GetDeploymentRequest} + * @param options - The waiting options + * @returns A Promise of Deployment + */ + waitForDeployment = ( + request: Readonly, + options?: Readonly>, + ) => + waitForResource( + options?.stop ?? + (res => + Promise.resolve(!DEPLOYMENT_TRANSIENT_STATUSES.includes(res.status))), + this.getDeployment, + request, + options, + ) + + /** + * Create a deployment. Create a new inference deployment related to a + * specific model. + * + * @param request - The request {@link CreateDeploymentRequest} + * @returns A Promise of Deployment + */ + createDeployment = (request: Readonly) => + this.client.fetch( + { + body: JSON.stringify( + marshalCreateDeploymentRequest(request, this.client.settings), + ), + headers: jsonContentHeaders, + method: 'POST', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/deployments`, + }, + unmarshalDeployment, + ) + + /** + * Update a deployment. Update an existing inference deployment. + * + * @param request - The request {@link UpdateDeploymentRequest} + * @returns A Promise of Deployment + */ + updateDeployment = (request: Readonly) => + this.client.fetch( + { + body: JSON.stringify( + marshalUpdateDeploymentRequest(request, this.client.settings), + ), + headers: jsonContentHeaders, + method: 'PATCH', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/deployments/${validatePathParam('deploymentId', request.deploymentId)}`, + }, + unmarshalDeployment, + ) + + /** + * Delete a deployment. Delete an existing inference deployment. + * + * @param request - The request {@link DeleteDeploymentRequest} + * @returns A Promise of Deployment + */ + deleteDeployment = (request: Readonly) => + this.client.fetch( + { + method: 'DELETE', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/deployments/${validatePathParam('deploymentId', request.deploymentId)}`, + }, + unmarshalDeployment, + ) + + /** + * Get the CA certificate. Get the CA certificate used for the deployment of + * private endpoints. The CA certificate will be returned as a PEM file. + * + * @param request - The request {@link GetDeploymentCertificateRequest} + * @returns A Promise of Blob + */ + getDeploymentCertificate = ( + request: Readonly, + ) => + this.client.fetch({ + method: 'GET', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/deployments/${validatePathParam('deploymentId', request.deploymentId)}/certificate`, + urlParams: urlParams(['dl', 1]), + responseType: 'blob', + }) + + /** + * Create an endpoint. Create a new Endpoint related to a specific deployment. + * + * @param request - The request {@link CreateEndpointRequest} + * @returns A Promise of Endpoint + */ + createEndpoint = (request: Readonly) => + this.client.fetch( + { + body: JSON.stringify( + marshalCreateEndpointRequest(request, this.client.settings), + ), + headers: jsonContentHeaders, + method: 'POST', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/endpoints`, + }, + unmarshalEndpoint, + ) + + /** + * Update an endpoint. Update an existing Endpoint. + * + * @param request - The request {@link UpdateEndpointRequest} + * @returns A Promise of Endpoint + */ + updateEndpoint = (request: Readonly) => + this.client.fetch( + { + body: JSON.stringify( + marshalUpdateEndpointRequest(request, this.client.settings), + ), + headers: jsonContentHeaders, + method: 'PATCH', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/endpoints/${validatePathParam('endpointId', request.endpointId)}`, + }, + unmarshalEndpoint, + ) + + /** + * Delete an endpoint. Delete an existing Endpoint. + * + * @param request - The request {@link DeleteEndpointRequest} + */ + deleteEndpoint = (request: Readonly) => + this.client.fetch({ + method: 'DELETE', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/endpoints/${validatePathParam('endpointId', request.endpointId)}`, + }) + + protected pageOfListModels = (request: Readonly = {}) => + this.client.fetch( + { + method: 'GET', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/models`, + urlParams: urlParams( + ['name', request.name], + ['order_by', request.orderBy], + ['page', request.page], + [ + 'page_size', + request.pageSize ?? this.client.settings.defaultPageSize, + ], + ['project_id', request.projectId], + ['tags', request.tags], + ), + }, + unmarshalListModelsResponse, + ) + + /** + * List models. List all available models. + * + * @param request - The request {@link ListModelsRequest} + * @returns A Promise of ListModelsResponse + */ + listModels = (request: Readonly = {}) => + enrichForPagination('models', this.pageOfListModels, request) + + /** + * Get a model. Get the model for the given ID. + * + * @param request - The request {@link GetModelRequest} + * @returns A Promise of Model + */ + getModel = (request: Readonly) => + this.client.fetch( + { + method: 'GET', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/models/${validatePathParam('modelId', request.modelId)}`, + }, + unmarshalModel, + ) + + /** + * Waits for {@link Model} to be in a final state. + * + * @param request - The request {@link GetModelRequest} + * @param options - The waiting options + * @returns A Promise of Model + */ + waitForModel = ( + request: Readonly, + options?: Readonly>, + ) => + waitForResource( + options?.stop ?? + (res => + Promise.resolve(!MODEL_TRANSIENT_STATUSES.includes(res.status))), + this.getModel, + request, + options, + ) + + /** + * Import a model. Import a new model to your model library. + * + * @param request - The request {@link CreateModelRequest} + * @returns A Promise of Model + */ + createModel = (request: Readonly) => + this.client.fetch( + { + body: JSON.stringify( + marshalCreateModelRequest(request, this.client.settings), + ), + headers: jsonContentHeaders, + method: 'POST', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/models`, + }, + unmarshalModel, + ) + + /** + * Delete a model. Delete an existing model from your model library. + * + * @param request - The request {@link DeleteModelRequest} + */ + deleteModel = (request: Readonly) => + this.client.fetch({ + method: 'DELETE', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/models/${validatePathParam('modelId', request.modelId)}`, + }) + + protected pageOfListNodeTypes = (request: Readonly) => + this.client.fetch( + { + method: 'GET', + path: `/inference/v1/regions/${validatePathParam('region', request.region ?? this.client.settings.defaultRegion)}/node-types`, + urlParams: urlParams( + ['include_disabled_types', request.includeDisabledTypes], + ['page', request.page], + [ + 'page_size', + request.pageSize ?? this.client.settings.defaultPageSize, + ], + ), + }, + unmarshalListNodeTypesResponse, + ) + + /** + * List available node types. List all available node types. By default, the + * node types returned in the list are ordered by creation date in ascending + * order, though this can be modified via the `order_by` field. + * + * @param request - The request {@link ListNodeTypesRequest} + * @returns A Promise of ListNodeTypesResponse + */ + listNodeTypes = (request: Readonly) => + enrichForPagination('nodeTypes', this.pageOfListNodeTypes, request) +} diff --git a/packages/clients/src/api/inference/v1/content.gen.ts b/packages/clients/src/api/inference/v1/content.gen.ts new file mode 100644 index 000000000..9c288901a --- /dev/null +++ b/packages/clients/src/api/inference/v1/content.gen.ts @@ -0,0 +1,16 @@ +// This file was automatically generated. DO NOT EDIT. +// If you have any remark or suggestion do not hesitate to open an issue. +import type { DeploymentStatus, ModelStatus } from './types.gen' + +/** Lists transient statutes of the enum {@link DeploymentStatus}. */ +export const DEPLOYMENT_TRANSIENT_STATUSES: DeploymentStatus[] = [ + 'creating', + 'deploying', + 'deleting', +] + +/** Lists transient statutes of the enum {@link ModelStatus}. */ +export const MODEL_TRANSIENT_STATUSES: ModelStatus[] = [ + 'preparing', + 'downloading', +] diff --git a/packages/clients/src/api/inference/v1/index.gen.ts b/packages/clients/src/api/inference/v1/index.gen.ts new file mode 100644 index 000000000..a875e5e31 --- /dev/null +++ b/packages/clients/src/api/inference/v1/index.gen.ts @@ -0,0 +1,41 @@ +// This file was automatically generated. DO NOT EDIT. +// If you have any remark or suggestion do not hesitate to open an issue. +export { API } from './api.gen' +export * from './content.gen' +export type { + CreateDeploymentRequest, + CreateEndpointRequest, + CreateModelRequest, + DeleteDeploymentRequest, + DeleteEndpointRequest, + DeleteModelRequest, + Deployment, + DeploymentQuantization, + DeploymentStatus, + Endpoint, + EndpointPrivateNetworkDetails, + EndpointPublicNetworkDetails, + EndpointSpec, + GetDeploymentCertificateRequest, + GetDeploymentRequest, + GetModelRequest, + ListDeploymentsRequest, + ListDeploymentsRequestOrderBy, + ListDeploymentsResponse, + ListModelsRequest, + ListModelsRequestOrderBy, + ListModelsResponse, + ListNodeTypesRequest, + ListNodeTypesResponse, + Model, + ModelSource, + ModelStatus, + ModelSupportInfo, + ModelSupportedNode, + ModelSupportedQuantization, + NodeType, + NodeTypeStock, + UpdateDeploymentRequest, + UpdateEndpointRequest, +} from './types.gen' +export * as ValidationRules from './validation-rules.gen' diff --git a/packages/clients/src/api/inference/v1/marshalling.gen.ts b/packages/clients/src/api/inference/v1/marshalling.gen.ts new file mode 100644 index 000000000..e32a6cc25 --- /dev/null +++ b/packages/clients/src/api/inference/v1/marshalling.gen.ts @@ -0,0 +1,370 @@ +// This file was automatically generated. DO NOT EDIT. +// If you have any remark or suggestion do not hesitate to open an issue. +import randomName from '@scaleway/random-name' +import { + isJSONObject, + resolveOneOf, + unmarshalArrayOfObject, + unmarshalDate, +} from '../../../bridge' +import type { DefaultValues } from '../../../bridge' +import type { + CreateDeploymentRequest, + CreateEndpointRequest, + CreateModelRequest, + Deployment, + DeploymentQuantization, + Endpoint, + EndpointPrivateNetworkDetails, + EndpointPublicNetworkDetails, + EndpointSpec, + ListDeploymentsResponse, + ListModelsResponse, + ListNodeTypesResponse, + Model, + ModelSource, + ModelSupportInfo, + ModelSupportedNode, + ModelSupportedQuantization, + NodeType, + UpdateDeploymentRequest, + UpdateEndpointRequest, +} from './types.gen' + +const unmarshalEndpointPrivateNetworkDetails = ( + data: unknown, +): EndpointPrivateNetworkDetails => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'EndpointPrivateNetworkDetails' failed as data isn't a dictionary.`, + ) + } + + return { + privateNetworkId: data.private_network_id, + } as EndpointPrivateNetworkDetails +} + +const unmarshalEndpointPublicNetworkDetails = ( + data: unknown, +): EndpointPublicNetworkDetails => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'EndpointPublicNetworkDetails' failed as data isn't a dictionary.`, + ) + } + + return {} as EndpointPublicNetworkDetails +} + +export const unmarshalEndpoint = (data: unknown): Endpoint => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'Endpoint' failed as data isn't a dictionary.`, + ) + } + + return { + disableAuth: data.disable_auth, + id: data.id, + privateNetwork: data.private_network + ? unmarshalEndpointPrivateNetworkDetails(data.private_network) + : undefined, + publicNetwork: data.public_network + ? unmarshalEndpointPublicNetworkDetails(data.public_network) + : undefined, + url: data.url, + } as Endpoint +} + +const unmarshalModelSupportedQuantization = ( + data: unknown, +): ModelSupportedQuantization => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'ModelSupportedQuantization' failed as data isn't a dictionary.`, + ) + } + + return { + allowed: data.allowed, + maxContextSize: data.max_context_size, + quantizationBits: data.quantization_bits, + } as ModelSupportedQuantization +} + +const unmarshalModelSupportedNode = (data: unknown): ModelSupportedNode => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'ModelSupportedNode' failed as data isn't a dictionary.`, + ) + } + + return { + nodeTypeName: data.node_type_name, + quantizations: unmarshalArrayOfObject( + data.quantizations, + unmarshalModelSupportedQuantization, + ), + } as ModelSupportedNode +} + +export const unmarshalModelSupportInfo = (data: unknown): ModelSupportInfo => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'ModelSupportInfo' failed as data isn't a dictionary.`, + ) + } + + return { + nodes: unmarshalArrayOfObject(data.nodes, unmarshalModelSupportedNode), + } as ModelSupportInfo +} + +const unmarshalDeploymentQuantization = ( + data: unknown, +): DeploymentQuantization => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'DeploymentQuantization' failed as data isn't a dictionary.`, + ) + } + + return { + bits: data.bits, + enabled: data.enabled, + } as DeploymentQuantization +} + +export const unmarshalDeployment = (data: unknown): Deployment => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'Deployment' failed as data isn't a dictionary.`, + ) + } + + return { + createdAt: unmarshalDate(data.created_at), + endpoints: unmarshalArrayOfObject(data.endpoints, unmarshalEndpoint), + errorMessage: data.error_message, + id: data.id, + maxSize: data.max_size, + minSize: data.min_size, + modelId: data.model_id, + modelName: data.model_name, + name: data.name, + nodeTypeName: data.node_type_name, + projectId: data.project_id, + quantization: data.quantization + ? unmarshalDeploymentQuantization(data.quantization) + : undefined, + region: data.region, + size: data.size, + status: data.status, + tags: data.tags, + updatedAt: unmarshalDate(data.updated_at), + } as Deployment +} + +export const unmarshalModel = (data: unknown): Model => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'Model' failed as data isn't a dictionary.`, + ) + } + + return { + createdAt: unmarshalDate(data.created_at), + description: data.description, + errorMessage: data.error_message, + hasEula: data.has_eula, + id: data.id, + name: data.name, + nodesSupport: unmarshalArrayOfObject( + data.nodes_support, + unmarshalModelSupportInfo, + ), + parameterSizeBits: data.parameter_size_bits, + projectId: data.project_id, + region: data.region, + sizeBytes: data.size_bytes, + status: data.status, + tags: data.tags, + updatedAt: unmarshalDate(data.updated_at), + } as Model +} + +export const unmarshalListDeploymentsResponse = ( + data: unknown, +): ListDeploymentsResponse => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'ListDeploymentsResponse' failed as data isn't a dictionary.`, + ) + } + + return { + deployments: unmarshalArrayOfObject(data.deployments, unmarshalDeployment), + totalCount: data.total_count, + } as ListDeploymentsResponse +} + +export const unmarshalListModelsResponse = ( + data: unknown, +): ListModelsResponse => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'ListModelsResponse' failed as data isn't a dictionary.`, + ) + } + + return { + models: unmarshalArrayOfObject(data.models, unmarshalModel), + totalCount: data.total_count, + } as ListModelsResponse +} + +const unmarshalNodeType = (data: unknown): NodeType => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'NodeType' failed as data isn't a dictionary.`, + ) + } + + return { + beta: data.beta, + createdAt: unmarshalDate(data.created_at), + description: data.description, + disabled: data.disabled, + gpus: data.gpus, + memory: data.memory, + name: data.name, + region: data.region, + stockStatus: data.stock_status, + updatedAt: unmarshalDate(data.updated_at), + vcpus: data.vcpus, + vram: data.vram, + } as NodeType +} + +export const unmarshalListNodeTypesResponse = ( + data: unknown, +): ListNodeTypesResponse => { + if (!isJSONObject(data)) { + throw new TypeError( + `Unmarshalling the type 'ListNodeTypesResponse' failed as data isn't a dictionary.`, + ) + } + + return { + nodeTypes: unmarshalArrayOfObject(data.node_types, unmarshalNodeType), + totalCount: data.total_count, + } as ListNodeTypesResponse +} + +const marshalEndpointPrivateNetworkDetails = ( + request: EndpointPrivateNetworkDetails, + defaults: DefaultValues, +): Record => ({ + private_network_id: request.privateNetworkId, +}) + +const marshalEndpointPublicNetworkDetails = ( + request: EndpointPublicNetworkDetails, + defaults: DefaultValues, +): Record => ({}) + +const marshalDeploymentQuantization = ( + request: DeploymentQuantization, + defaults: DefaultValues, +): Record => ({ + bits: request.bits, + enabled: request.enabled, +}) + +const marshalEndpointSpec = ( + request: EndpointSpec, + defaults: DefaultValues, +): Record => ({ + disable_auth: request.disableAuth, + ...resolveOneOf([ + { + param: 'public_network', + value: + request.publicNetwork !== undefined + ? marshalEndpointPublicNetworkDetails(request.publicNetwork, defaults) + : undefined, + }, + { + param: 'private_network', + value: + request.privateNetwork !== undefined + ? marshalEndpointPrivateNetworkDetails( + request.privateNetwork, + defaults, + ) + : undefined, + }, + ]), +}) + +export const marshalCreateDeploymentRequest = ( + request: CreateDeploymentRequest, + defaults: DefaultValues, +): Record => ({ + accept_eula: request.acceptEula, + endpoints: request.endpoints.map(elt => marshalEndpointSpec(elt, defaults)), + max_size: request.maxSize, + min_size: request.minSize, + model_id: request.modelId, + name: request.name || randomName('inference'), + node_type_name: request.nodeTypeName, + project_id: request.projectId ?? defaults.defaultProjectId, + quantization: + request.quantization !== undefined + ? marshalDeploymentQuantization(request.quantization, defaults) + : undefined, + tags: request.tags, +}) + +export const marshalCreateEndpointRequest = ( + request: CreateEndpointRequest, + defaults: DefaultValues, +): Record => ({ + deployment_id: request.deploymentId, + endpoint: marshalEndpointSpec(request.endpoint, defaults), +}) + +const marshalModelSource = ( + request: ModelSource, + defaults: DefaultValues, +): Record => ({ + url: request.url, + ...resolveOneOf([{ param: 'secret', value: request.secret }]), +}) + +export const marshalCreateModelRequest = ( + request: CreateModelRequest, + defaults: DefaultValues, +): Record => ({ + name: request.name || randomName('model'), + project_id: request.projectId ?? defaults.defaultProjectId, + source: marshalModelSource(request.source, defaults), +}) + +export const marshalUpdateDeploymentRequest = ( + request: UpdateDeploymentRequest, + defaults: DefaultValues, +): Record => ({ + max_size: request.maxSize, + min_size: request.minSize, + name: request.name, + tags: request.tags, +}) + +export const marshalUpdateEndpointRequest = ( + request: UpdateEndpointRequest, + defaults: DefaultValues, +): Record => ({ + disable_auth: request.disableAuth, +}) diff --git a/packages/clients/src/api/inference/v1/types.gen.ts b/packages/clients/src/api/inference/v1/types.gen.ts new file mode 100644 index 000000000..eca976491 --- /dev/null +++ b/packages/clients/src/api/inference/v1/types.gen.ts @@ -0,0 +1,455 @@ +// This file was automatically generated. DO NOT EDIT. +// If you have any remark or suggestion do not hesitate to open an issue. +import type { Region as ScwRegion } from '../../../bridge' + +export type DeploymentStatus = + | 'unknown_status' + | 'creating' + | 'deploying' + | 'ready' + | 'error' + | 'deleting' + | 'locked' + +export type ListDeploymentsRequestOrderBy = + | 'created_at_desc' + | 'created_at_asc' + | 'name_asc' + | 'name_desc' + +export type ListModelsRequestOrderBy = + | 'display_rank_asc' + | 'created_at_asc' + | 'created_at_desc' + | 'name_asc' + | 'name_desc' + +export type ModelStatus = + | 'unknown_status' + | 'preparing' + | 'downloading' + | 'ready' + | 'error' + +export type NodeTypeStock = + | 'unknown_stock' + | 'low_stock' + | 'out_of_stock' + | 'available' + +export interface ModelSupportedQuantization { + /** Number of bits for this supported quantization. */ + quantizationBits: number + /** Tells whether this quantization is allowed for this node type. */ + allowed: boolean + /** + * Maximum inference context size available for this node type and + * quantization. + */ + maxContextSize: number +} + +export interface EndpointPrivateNetworkDetails { + privateNetworkId: string +} + +export interface EndpointPublicNetworkDetails {} + +export interface ModelSupportedNode { + /** Supported node type. */ + nodeTypeName: string + /** Supported quantizations. */ + quantizations: ModelSupportedQuantization[] +} + +export interface DeploymentQuantization { + /** Whether to enable quantization for this deployment. */ + enabled: boolean + /** + * The number of bits each model parameter should be quantized to. The + * quantization method is chosen based on this value. + */ + bits: number +} + +export interface Endpoint { + /** Unique identifier. */ + id: string + /** + * For private endpoints, the URL will be accessible only from the Private + * Network. In addition, private endpoints will expose a CA certificate that + * can be used to verify the server's identity. This CA certificate can be + * retrieved using the `GetDeploymentCertificate` API call. + */ + url: string + /** + * Defines whether the endpoint is public. + * + * One-of ('details'): at most one of 'publicNetwork', 'privateNetwork' could + * be set. + */ + publicNetwork?: EndpointPublicNetworkDetails + /** + * Details of the Private Network. + * + * One-of ('details'): at most one of 'publicNetwork', 'privateNetwork' could + * be set. + */ + privateNetwork?: EndpointPrivateNetworkDetails + /** Defines whether the authentication is disabled. */ + disableAuth: boolean +} + +export interface ModelSupportInfo { + /** List of supported node types. */ + nodes: ModelSupportedNode[] +} + +export interface EndpointSpec { + /** + * Set the endpoint as public. + * + * One-of ('details'): at most one of 'publicNetwork', 'privateNetwork' could + * be set. + */ + publicNetwork?: EndpointPublicNetworkDetails + /** + * Private endpoints are only accessible from the Private Network. + * + * One-of ('details'): at most one of 'publicNetwork', 'privateNetwork' could + * be set. + */ + privateNetwork?: EndpointPrivateNetworkDetails + /** + * By default, deployments are protected by IAM authentication. When setting + * this field to true, the authentication will be disabled. + */ + disableAuth: boolean +} + +export interface ModelSource { + url: string + /** One-of ('credentials'): at most one of 'secret' could be set. */ + secret?: string +} + +export interface Deployment { + /** Unique identifier. */ + id: string + /** Name of the deployment. */ + name: string + /** Project ID. */ + projectId: string + /** Status of the deployment. */ + status: DeploymentStatus + /** List of tags applied to the deployment. */ + tags: string[] + /** Node type of the deployment. */ + nodeTypeName: string + /** List of endpoints. */ + endpoints: Endpoint[] + /** Current size of the pool. */ + size: number + /** Defines the minimum size of the pool. */ + minSize: number + /** Defines the maximum size of the pool. */ + maxSize: number + /** Displays information if your deployment is in error state. */ + errorMessage?: string + /** ID of the model used for the deployment. */ + modelId: string + /** Quantization parameters for this deployment. */ + quantization?: DeploymentQuantization + /** Name of the deployed model. */ + modelName: string + /** Creation date of the deployment. */ + createdAt?: Date + /** Last modification date of the deployment. */ + updatedAt?: Date + /** Region of the deployment. */ + region: ScwRegion +} + +export interface Model { + /** Unique identifier. */ + id: string + /** Unique Name identifier. */ + name: string + /** Project ID. */ + projectId: string + /** List of tags applied to the model. */ + tags: string[] + /** Status of the model. */ + status: ModelStatus + /** Purpose of the model. */ + description: string + /** Displays information if your model is in error state. */ + errorMessage?: string + /** Defines whether the model has an end user license agreement. */ + hasEula: boolean + /** Creation date of the model. */ + createdAt?: Date + /** Last modification date of the model. */ + updatedAt?: Date + /** Region of the model. */ + region: ScwRegion + /** Supported nodes types with quantization options and context lengths. */ + nodesSupport: ModelSupportInfo[] + /** Size, in bits, of the model parameters. */ + parameterSizeBits: number + /** Total size, in bytes, of the model files. */ + sizeBytes: number +} + +export interface NodeType { + /** Name of the node type. */ + name: string + /** Current stock status for the node type. */ + stockStatus: NodeTypeStock + /** Current specs of the offer. */ + description: string + /** Number of virtual CPUs. */ + vcpus: number + /** Quantity of RAM. */ + memory: number + /** Quantity of GPU RAM. */ + vram: number + /** The node type is currently disabled. */ + disabled: boolean + /** The node type is currently in beta. */ + beta: boolean + /** Creation date of the node type. */ + createdAt?: Date + /** Last modification date of the node type. */ + updatedAt?: Date + /** Number of GPUs. */ + gpus: number + /** Region of the node type. */ + region: ScwRegion +} + +export interface CreateDeploymentRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** Name of the deployment. */ + name?: string + /** ID of the Project to create the deployment in. */ + projectId?: string + /** ID of the model to use. */ + modelId: string + /** + * If the model has an EULA, you must accept it before proceeding. The terms + * of the EULA can be retrieved using the `GetModelEula` API call. + */ + acceptEula?: boolean + /** Name of the node type to use. */ + nodeTypeName: string + /** List of tags to apply to the deployment. */ + tags?: string[] + /** Defines the minimum size of the pool. */ + minSize?: number + /** Defines the maximum size of the pool. */ + maxSize?: number + /** List of endpoints to create. */ + endpoints: EndpointSpec[] + /** Quantization settings to apply to this deployment. */ + quantization?: DeploymentQuantization +} + +export interface CreateEndpointRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** ID of the deployment to create the endpoint for. */ + deploymentId: string + /** Specification of the endpoint. */ + endpoint: EndpointSpec +} + +export interface CreateModelRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** Name of the model. */ + name?: string + /** ID of the Project to import the model in. */ + projectId?: string + /** Where to import the model from. */ + source: ModelSource +} + +export interface DeleteDeploymentRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** ID of the deployment to delete. */ + deploymentId: string +} + +export interface DeleteEndpointRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** ID of the endpoint to delete. */ + endpointId: string +} + +export interface DeleteModelRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** ID of the model to delete. */ + modelId: string +} + +export interface GetDeploymentCertificateRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + deploymentId: string +} + +export interface GetDeploymentRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** ID of the deployment to get. */ + deploymentId: string +} + +export interface GetModelRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** ID of the model to get. */ + modelId: string +} + +export interface ListDeploymentsRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** Page number to return. */ + page?: number + /** Maximum number of deployments to return per page. */ + pageSize?: number + /** Order in which to return results. */ + orderBy?: ListDeploymentsRequestOrderBy + /** Filter by Project ID. */ + projectId?: string + /** Filter by Organization ID. */ + organizationId?: string + /** Filter by deployment name. */ + name?: string + /** Filter by tags. */ + tags?: string[] +} + +export interface ListDeploymentsResponse { + /** List of deployments on the current page. */ + deployments: Deployment[] + /** Total number of deployments. */ + totalCount: number +} + +export interface ListModelsRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** Order in which to return results. */ + orderBy?: ListModelsRequestOrderBy + /** Page number to return. */ + page?: number + /** Maximum number of models to return per page. */ + pageSize?: number + /** Filter by Project ID. */ + projectId?: string + /** Filter by model name. */ + name?: string + /** Filter by tags. */ + tags?: string[] +} + +export interface ListModelsResponse { + /** List of models on the current page. */ + models: Model[] + /** Total number of models. */ + totalCount: number +} + +export interface ListNodeTypesRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** Page number to return. */ + page?: number + /** Maximum number of node types to return per page. */ + pageSize?: number + /** Include disabled node types in the response. */ + includeDisabledTypes: boolean +} + +export interface ListNodeTypesResponse { + /** List of node types. */ + nodeTypes: NodeType[] + /** Total number of node types. */ + totalCount: number +} + +export interface UpdateDeploymentRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** ID of the deployment to update. */ + deploymentId: string + /** Name of the deployment. */ + name?: string + /** List of tags to apply to the deployment. */ + tags?: string[] + /** Defines the new minimum size of the pool. */ + minSize?: number + /** Defines the new maximum size of the pool. */ + maxSize?: number +} + +export interface UpdateEndpointRequest { + /** + * Region to target. If none is passed will use default region from the + * config. + */ + region?: ScwRegion + /** ID of the endpoint to update. */ + endpointId: string + /** + * By default, deployments are protected by IAM authentication. When setting + * this field to true, the authentication will be disabled. + */ + disableAuth?: boolean +} diff --git a/packages/clients/src/api/inference/v1/validation-rules.gen.ts b/packages/clients/src/api/inference/v1/validation-rules.gen.ts new file mode 100644 index 000000000..ff2f8f7df --- /dev/null +++ b/packages/clients/src/api/inference/v1/validation-rules.gen.ts @@ -0,0 +1,65 @@ +// This file was automatically generated. DO NOT EDIT. +// If you have any remark or suggestion do not hesitate to open an issue. + +export const CreateDeploymentRequest = { + maxSize: { + greaterThanOrEqual: 1, + lessThanOrEqual: 50, + }, + minSize: { + greaterThanOrEqual: 1, + lessThanOrEqual: 50, + }, + name: { + maxLength: 255, + minLength: 1, + pattern: /^[A-Za-z0-9-_]+$/, + }, + nodeTypeName: { + maxLength: 64, + minLength: 1, + }, +} + +export const CreateModelRequest = { + name: { + maxLength: 255, + minLength: 1, + pattern: /^[A-Za-z0-9-_/.:]+$/, + }, +} + +export const DeploymentQuantization = { + bits: { + lessThanOrEqual: 32, + }, +} + +export const ListDeploymentsRequest = { + name: { + maxLength: 255, + minLength: 1, + }, +} + +export const ListModelsRequest = { + name: { + maxLength: 255, + minLength: 1, + }, +} + +export const UpdateDeploymentRequest = { + maxSize: { + greaterThanOrEqual: 1, + lessThanOrEqual: 50, + }, + minSize: { + greaterThanOrEqual: 1, + lessThanOrEqual: 50, + }, + name: { + maxLength: 255, + minLength: 1, + }, +}