diff --git a/README.md b/README.md index a2a3390..407ef24 100644 --- a/README.md +++ b/README.md @@ -157,13 +157,10 @@ $ uv run python -m examples.openai-sync-chat ## Update typesgen.py +The `typesgen.py` script is derivated from the `aiapi.yaml`. +This script will download the latest YAML file, patch it if necessary +and generate the `typesgen.py` file. + ```bash -$ uv run datamodel-codegen \ - --input tests/api-mocks/aiapi.yaml \ - --input-file-type openapi \ - --output scope3ai/api/typesgen.py \ - --output-model-type pydantic_v2.BaseModel \ - --use-schema-description \ - --allow-extra-fields \ - && uv run ruff format scope3ai/api/typesgen.py +$ uv run tools/sync-api.py ``` diff --git a/examples/api-async.py b/examples/api-async.py index 58aed0a..54d606d 100644 --- a/examples/api-async.py +++ b/examples/api-async.py @@ -24,10 +24,10 @@ async def list_gpus(): async def send_impact(): - from scope3ai.api.types import ImpactRow, Model + from scope3ai.api.types import ImpactRow print("Sending impact") - impact = ImpactRow(model=Model(id="gpt_4o"), input_tokens=100, output_tokens=100) + impact = ImpactRow(model_id="gpt_4o", input_tokens=100, output_tokens=100) response = await client.impact(rows=[impact]) print(response) diff --git a/examples/api-sync.py b/examples/api-sync.py index 50d78a7..b32c559 100644 --- a/examples/api-sync.py +++ b/examples/api-sync.py @@ -24,10 +24,10 @@ def list_gpus(): def send_impact(): - from scope3ai.api.types import ImpactRow, Model + from scope3ai.api.types import ImpactRow print("Sending impact") - impact = ImpactRow(model=Model(id="gpt_4o"), input_tokens=100, output_tokens=100) + impact = ImpactRow(model="gpt_4o", input_tokens=100, output_tokens=100) response = client.impact(rows=[impact]) print(response) diff --git a/scope3ai/api/client.py b/scope3ai/api/client.py index 6d22d9c..e5496fc 100644 --- a/scope3ai/api/client.py +++ b/scope3ai/api/client.py @@ -6,15 +6,13 @@ from .defaults import DEFAULT_API_URL from .types import ( - GPU, + Family, + GPUResponse, ImpactRequest, - ImpactRow, ImpactResponse, - Family, + ImpactRow, ModelResponse, - CloudProvider, NodeResponse, - ManagedServiceProvider, ) @@ -73,31 +71,40 @@ def model( def gpu( self, with_response: Optional[bool] = True, - ) -> GPU: + ) -> GPUResponse: """ List GPUs """ return self.execute_request( "/gpu", method="GET", - response_model=GPU, + response_model=GPUResponse, with_response=with_response, ) def node( self, - service: Optional[ManagedServiceProvider] = None, - cloud: Optional[CloudProvider] = None, + service: Optional[str] = None, + cloud: Optional[str] = None, + custom: Optional[bool] = None, + gpu: Optional[str] = None, + instance: Optional[str] = None, with_response: Optional[bool] = True, ) -> NodeResponse: """ List nodes """ params = {} - if service: - params["service"] = service.value - if cloud: - params["cloud"] = cloud.value + if service is not None: + params["service"] = service + if cloud is not None: + params["cloud"] = cloud + if custom is not None: + params["custom"] = custom + if gpu is not None: + params["gpu"] = gpu + if instance is not None: + params["instance"] = instance return self.execute_request( "/node", method="GET", diff --git a/scope3ai/api/tracer.py b/scope3ai/api/tracer.py index c217991..51a4c75 100644 --- a/scope3ai/api/tracer.py +++ b/scope3ai/api/tracer.py @@ -1,4 +1,5 @@ from typing import List, Optional + from .typesgen import ImpactResponse, ModeledRow diff --git a/scope3ai/api/types.py b/scope3ai/api/types.py index 7e44465..453da36 100644 --- a/scope3ai/api/types.py +++ b/scope3ai/api/types.py @@ -8,7 +8,6 @@ from .tracer import Tracer from .typesgen import ( GPU, - CloudProvider, CountryCode, DataType, DebugInfo, @@ -21,13 +20,10 @@ ImpactBigQueryError, ImpactBigQueryRequest, ImpactBigQueryResponse, - ImpactLogRequest, - ImpactLogRow, ImpactMetrics, ImpactRequest, ImpactResponse, ImpactRow, - ManagedServiceProvider, Model, ModeledRow, ModelResponse, @@ -93,22 +89,18 @@ async def await_impact(self, timeout: Optional[float] = None): "Details", "Error", "GPU", - "ManagedServiceProvider", "Image", - "CloudProvider", "Task", "Family", "DataType", "CountryCode", "RegionCode", "GPUResponse", - "ImpactLogRow", "Model", "GridMix", "Node", "ModelResponse", "NodeResponse", - "ImpactLogRequest", "ImpactRow", "DebugInfo", "ImpactRequest", diff --git a/scope3ai/api/typesgen.py b/scope3ai/api/typesgen.py index f92ecdc..ecdec6b 100644 --- a/scope3ai/api/typesgen.py +++ b/scope3ai/api/typesgen.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: aiapi.yaml -# timestamp: 2024-12-26T16:55:15+00:00 +# timestamp: 2025-01-16T00:04:03+00:00 from __future__ import annotations @@ -8,77 +8,186 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union -from pydantic import BaseModel, ConfigDict, Field, RootModel, confloat, conint, constr +from pydantic import BaseModel, ConfigDict, Field, RootModel +from typing_extensions import Annotated class StatusResponse(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) ready: bool reason: Optional[str] = None +class NodeCreateRequest(BaseModel): + """ + Create a new node. + Note on permissions: + - cloud_instance_id and managed_service_id can only be set by admins or users who own those resources + - Custom nodes are visible only to their owners + - Global nodes are visible to all users + - Admins can see and manage all nodes + + """ + + id: Annotated[ + str, + Field( + examples=["my-custom-node-1"], + max_length=64, + min_length=3, + pattern="^[a-z0-9-]+$", + ), + ] + cloud_id: Annotated[Optional[str], Field(examples=["aws"])] = None + cloud_instance_id: Annotated[Optional[str], Field(examples=["a2-highgpu-1g"])] = ( + None + ) + managed_service_id: Annotated[Optional[str], Field(examples=["aws-bedrock"])] = None + gpu_id: Annotated[str, Field(examples=["a100_40gb"])] + gpu_count: Annotated[int, Field(examples=[8], ge=0, le=10000)] + cpu_count: Annotated[int, Field(examples=[2], ge=1, le=10000)] + idle_power_w_ex_gpu: Annotated[ + Optional[float], Field(examples=[100], ge=0.0, le=10000.0) + ] = None + average_utilization_rate: Annotated[ + Optional[float], Field(examples=[0.8], ge=0.0, le=1.0) + ] = None + embodied_emissions_kgco2e_ex_gpu: Annotated[ + Optional[float], Field(examples=[2500], ge=0.0, le=100000.0) + ] = None + embodied_water_l_ex_gpu: Annotated[ + Optional[float], Field(examples=[2500], ge=0.0, le=100000.0) + ] = None + use_life_years: Annotated[Optional[float], Field(examples=[5], ge=1.0, le=30.0)] = ( + None + ) + + +class NodeUpdateRequest(BaseModel): + """ + Update an existing node. + - Id can not be updated + Note on permissions: + - cloud_instance_id and managed_service_id can only be set by admins or users who own those resources + - Custom nodes are visible only to their owners + - Global nodes are visible to all users + - Admins can see and manage all nodes + + """ + + cloud_id: Annotated[Optional[str], Field(examples=["aws"])] = None + cloud_instance_id: Annotated[Optional[str], Field(examples=["a2-highgpu-1g"])] = ( + None + ) + managed_service_id: Annotated[Optional[str], Field(examples=["aws-bedrock"])] = None + gpu_id: Annotated[Optional[str], Field(examples=["a100_40gb"])] = None + gpu_count: Annotated[Optional[int], Field(examples=[8], ge=0, le=10000)] = None + cpu_count: Annotated[Optional[int], Field(examples=[2], ge=1, le=10000)] = None + idle_power_w_ex_gpu: Annotated[ + Optional[float], Field(examples=[100], ge=0.0, le=10000.0) + ] = None + average_utilization_rate: Annotated[ + Optional[float], Field(examples=[0.8], ge=0.0, le=1.0) + ] = None + embodied_emissions_kgco2e_ex_gpu: Annotated[ + Optional[float], Field(examples=[2500], ge=0.0, le=100000.0) + ] = None + embodied_water_l_ex_gpu: Annotated[ + Optional[float], Field(examples=[2500], ge=0.0, le=100000.0) + ] = None + use_life_years: Annotated[Optional[float], Field(examples=[5], ge=1.0, le=30.0)] = ( + None + ) + + +class Call(RootModel[List[Union[str, int]]]): + root: Annotated[ + List[Union[str, int]], + Field( + description="Array of function call parameters in this exact order:\n model_id STRING, model_family STRING, model_name STRING,\n model_hugging_face_path STRING, request_time TIMESTAMP,\n node_id STRING, cloud_id STRING, cloud_region STRING,\n cloud_instance_id STRING, managed_service_id STRING,\n country STRING, region STRING, task STRING, input_tokens INT64,\n output_tokens INT64, input_images STRING, output_images STRING,\n output_video_resolution INT64,\n output_video_frames INT64, input_audio_seconds INT64, input_steps INT64\n", + examples=[ + [ + "gpt-4-turbo", + "2024-03-15T10:30:00Z", + "us-central1", + "US", + "CA", + "text-generation", + 100, + 50, + None, + ] + ], + max_length=21, + min_length=17, + ), + ] + + class ImpactBigQueryRequest(BaseModel): - model_config = ConfigDict( - extra="allow", - ) - requestId: str = Field( - ..., description="Unique identifier for the request", examples=["124ab1c"] - ) - caller: str = Field( - ..., - description="Full resource name of the BigQuery job", - examples=[ - "//bigquery.googleapis.com/projects/myproject/jobs/myproject:US.bquxjob_5b4c112c_17961fafeaf" - ], - ) - sessionUser: str = Field( - ..., - description="Email of the user executing the BigQuery query", - examples=["user@company.com"], - ) - userDefinedContext: Optional[Dict[str, Any]] = Field( - None, description="User-defined context from BigQuery" - ) - calls: List[List[Union[str, int]]] + requestId: Annotated[ + str, + Field(description="Unique identifier for the request", examples=["124ab1c"]), + ] + caller: Annotated[ + str, + Field( + description="Full resource name of the BigQuery job", + examples=[ + "//bigquery.googleapis.com/projects/myproject/jobs/myproject:US.bquxjob_5b4c112c_17961fafeaf" + ], + ), + ] + sessionUser: Annotated[ + str, + Field( + description="Email of the user executing the BigQuery query", + examples=["user@company.com"], + ), + ] + userDefinedContext: Annotated[ + Optional[Dict[str, Any]], + Field(description="User-defined context from BigQuery"), + ] = None + calls: List[Call] class ImpactBigQueryResponse(BaseModel): - model_config = ConfigDict( - extra="allow", - ) - replies: List[str] = Field( - ..., description="Array of impact metric results", max_length=1000, min_length=0 - ) + replies: Annotated[ + List[str], + Field( + description="Array of impact metric results", max_length=1000, min_length=0 + ), + ] errorMessage: Optional[str] = None class ImpactBigQueryError(BaseModel): - model_config = ConfigDict( - extra="allow", - ) - errorMessage: str = Field( - ..., - description="Error message for BigQuery", - examples=["Invalid request format: missing required field 'calls'"], - ) + errorMessage: Annotated[ + str, + Field( + description="Error message for BigQuery", + examples=["Invalid request format: missing required field 'calls'"], + ), + ] class ImpactMetrics(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) - usage_energy_wh: float = Field(..., examples=[0.13]) - usage_emissions_gco2e: float = Field(..., examples=[0.81]) - usage_water_ml: float = Field(..., examples=[1.32]) - embodied_emissions_gco2e: float = Field(..., examples=[0.81]) - embodied_water_ml: float = Field(..., examples=[1.32]) + usage_energy_wh: Annotated[float, Field(examples=[0.13])] + usage_emissions_gco2e: Annotated[float, Field(examples=[0.81])] + usage_water_ml: Annotated[float, Field(examples=[1.32])] + embodied_emissions_gco2e: Annotated[float, Field(examples=[0.81])] + embodied_water_ml: Annotated[float, Field(examples=[1.32])] class PredictionStep(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) description: str duration_ms: float @@ -86,52 +195,52 @@ class PredictionStep(BaseModel): class Details(BaseModel): - model_config = ConfigDict( - extra="allow", - ) reason: Optional[str] = None field: Optional[str] = None class Error(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) code: Optional[str] = None message: str details: Optional[Details] = None +class Node(NodeCreateRequest): + customer_id: Annotated[ + Optional[Any], + Field( + description="ID of the customer who owns this node (visible to admins only)" + ), + ] = None + created_at: datetime + updated_at: datetime + created_by: Annotated[ + Optional[str], + Field(description="ID of the user who created the node (admin or owner only)"), + ] = None + + class GPU(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) - name: Optional[str] = Field(None, examples=["NVIDIA A100 40GB"]) - id: Optional[str] = Field(None, examples=["a100_40gb"]) - max_power_w: Optional[float] = Field(None, examples=[700]) - embodied_emissions_kgco2e: Optional[float] = Field(None, examples=[282.1]) - embodied_water_mlh2o: Optional[float] = Field(None, examples=[181.1]) - performance_ratio_to_h200: Optional[float] = Field(None, examples=[1.5]) - + name: Annotated[Optional[str], Field(examples=["NVIDIA A100 40GB"])] = None + id: Annotated[str, Field(examples=["a100_40gb"])] + max_power_w: Annotated[float, Field(examples=[700])] + embodied_emissions_kgco2e: Annotated[float, Field(examples=[282.1])] + embodied_water_mlh2o: Annotated[float, Field(examples=[181.1])] + performance_ratio_to_h200: Annotated[float, Field(examples=[1.5])] + ols_coefficient_gpu_count: Annotated[float, Field(examples=[11.4])] + ols_intercept: Annotated[float, Field(examples=[11.4])] -class ManagedServiceProvider(Enum): - aws_bedrock = "aws-bedrock" - azure_ml = "azure-ml" - google_vertex = "google-vertex" - ibm_watson = "ibm-watson" - hugging_face = "hugging-face" - -class Image(RootModel[constr(pattern=r"^\d{1,4}x\d{1,4}$")]): - root: constr(pattern=r"^\d{1,4}x\d{1,4}$") = Field(..., examples=["1024x1024"]) - - -class CloudProvider(Enum): - aws = "aws" - azure = "azure" - gcp = "gcp" - oracle = "oracle" - ibm = "ibm" +class Image(RootModel[str]): + root: Annotated[ + str, Field(examples=["1024x1024"], pattern="^(\\d{1,4})x(\\d{1,4})$") + ] class Task(Enum): @@ -239,324 +348,317 @@ class DataType(Enum): uint64 = "uint64" -class CountryCode(RootModel[constr(pattern=r"^[A-Z]{2}$", min_length=2, max_length=2)]): - root: constr(pattern=r"^[A-Z]{2}$", min_length=2, max_length=2) = Field( - ..., - description="Two-letter country code as defined by ISO 3166-1 alpha-2", - examples=["US"], - ) +class CountryCode(RootModel[str]): + root: Annotated[ + str, + Field( + description="Two-letter country code as defined by ISO 3166-1 alpha-2", + examples=["US"], + max_length=2, + min_length=2, + pattern="^[A-Z]{2}$", + ), + ] + + +class RegionCode(RootModel[str]): + root: Annotated[ + str, + Field( + description="Two-letter region code as defined by ISO 3166-1 alpha-2", + examples=["NY"], + max_length=2, + min_length=2, + pattern="^[A-Z]{2}$", + ), + ] -class RegionCode(RootModel[constr(pattern=r"^[A-Z]{2}$", min_length=2, max_length=2)]): - root: constr(pattern=r"^[A-Z]{2}$", min_length=2, max_length=2) = Field( - ..., - description="Two-letter region code as defined by ISO 3166-1 alpha-2", - examples=["NY"], +class NodeResponse(BaseModel): + model_config = ConfigDict( + extra="forbid", ) + nodes: Annotated[List[Node], Field(max_length=100)] class GPUResponse(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) - gpus: List[GPU] = Field(..., max_length=100) + gpus: Annotated[List[GPU], Field(max_length=100)] -class ImpactLogRow(BaseModel): +class ImpactRow(BaseModel): model_config = ConfigDict( - extra="allow", - ) - start_time_utc: Optional[datetime] = Field( - None, - description="The start time of the inference", - examples=["2024-10-01T00:00:00Z"], - ) - request_duration_ms: Optional[float] = Field( - None, - description="The time the request took (as measured by client or proxy)", - examples=[283], - ) - processing_duration_ms: Optional[float] = Field( - None, - description="The time taken in processing the request (as measured at execution)", - examples=[238], - ) - integration_source: Optional[str] = Field( - None, - description="The integration used to source the data", - examples=["litellm"], - ) - client_id: Optional[str] = Field( - None, description="The client to attribute this call to" - ) - project_id: Optional[str] = Field( - None, description="The project to attribute this call to" - ) - application_id: Optional[str] = Field( - None, description="The application to attribute this call to" - ) - session_id: Optional[str] = Field( - None, description="The ID of the session (multiple requests)" + extra="forbid", ) - request_id: Optional[str] = Field( - None, description="The unique identifier of this request" - ) - environment: Optional[str] = Field( - None, - description="Environment (prod/production indicates production)", - examples=["staging"], - ) - model_id: Optional[str] = Field( - None, description="The ID of the model requested", examples=["llama_31_8b"] - ) - model_id_used: Optional[str] = Field( - None, - description="The ID of the model that did the inference", - examples=["llama_31_8b_0125"], - ) - model_name: Optional[str] = Field( - None, description="The name of the model", examples=["LLaMa v3.1 8B"] - ) - model_family: Optional[str] = Field( - None, description="The family of the model", examples=["llama"] - ) - model_hugging_face_path: Optional[str] = Field( - None, - description="The Hugging Face path of the model", - examples=["meta/llama31_8b"], - ) - cloud_id: Optional[str] = Field( - None, description="The ID of the cloud", examples=["aws"] - ) - cloud_region: Optional[str] = Field( - None, description="The region of cloud hosting", examples=["us-central1"] - ) - cloud_instance_id: Optional[str] = Field( - None, description="The instance type in the cloud", examples=["xl-4g-8a100"] - ) - managed_service_id: Optional[str] = Field( - None, - description="The ID of a managed service provider", - examples=["aws-bedrock"], - ) - node_id: Optional[str] = Field( - None, description="The ID of a proprietary node", examples=["h200-2024-build"] - ) - node_country: Optional[ - constr(pattern=r"^[A-Z]{2}$", min_length=2, max_length=2) - ] = Field( - None, description="The country where the servers are hosted", examples=["US"] - ) - node_region: Optional[constr(pattern=r"^[A-Z]{2}$", min_length=2, max_length=2)] = ( + utc_datetime: Annotated[ + Optional[datetime], Field( - None, description="The region where the servers are hosted", examples=["CA"] - ) - ) - task: Optional[Task] = Field( - None, description="The task the AI is performing", examples=["text-generation"] - ) - input_tokens: Optional[conint(ge=0, le=100000000)] = Field( - None, description="the number of input tokens", examples=[1033] - ) - output_tokens: Optional[conint(ge=0, le=100000000)] = Field( - None, description="the number of output tokens", examples=[2300] - ) - input_audio_seconds: Optional[conint(ge=0, le=100000)] = Field( - None, description="the duration of audio input in seconds", examples=[60] - ) - output_audio_seconds: Optional[conint(ge=0, le=100000)] = Field( - None, description="the duration of audio output in seconds", examples=[60] - ) - input_images: Optional[str] = Field( - None, - description="a comma delimited list of image sizes", - examples=["512x512,1024x1024"], - ) - input_steps: Optional[conint(ge=1, le=10000)] = Field( - None, description="the number of steps in the model", examples=[50] - ) - output_images: Optional[str] = Field( - None, - description="a comma delimited list of output sizes", - examples=["512x512,1024x1024"], - ) - output_video_frames: Optional[conint(ge=0, le=100000000)] = Field( - None, - description="the number of video frames (frame rate x duration)", - examples=[60], - ) - output_video_resolution: Optional[int] = Field( - None, - description="the resolution of the video in number of lines (for instance, 1080 for 1080p)", - examples=[1080], - ) - request_cost: Optional[float] = Field(None, description="the cost of this request") - currency: Optional[str] = Field(None, description="the currency for cost data") + description="The start time of the request in UTC", + examples=["2022-01-01T00:00:00Z"], + ), + ] = None + request_duration_ms: Annotated[ + Optional[float], + Field( + description="The time the request took (as measured by client or proxy)", + examples=[283], + ), + ] = None + processing_duration_ms: Annotated[ + Optional[float], + Field( + description="The time taken in processing the request (as measured at execution)", + examples=[238], + ), + ] = None + request_cost: Annotated[ + Optional[float], + Field(description="The cost to execute this request", examples=[0.18]), + ] = None + currency: Annotated[ + Optional[str], + Field(description="The currency for the cost data", examples=["USD"]), + ] = None + integration_source: Annotated[ + Optional[str], + Field( + description="The integration used to source the data", examples=["litellm"] + ), + ] = None + environment: Annotated[ + Optional[str], + Field( + description="Environment (prod/production indicates production)", + examples=["staging"], + ), + ] = None + session_id: Annotated[ + Optional[str], Field(description="The ID of the session (multiple requests)") + ] = None + trace_id: Annotated[ + Optional[str], + Field( + description="The trace ID of the request (multiple requests in one task)" + ), + ] = None + request_id: Annotated[ + Optional[str], Field(description="The unique identifier of this request") + ] = None + client_id: Annotated[ + Optional[str], Field(description="The client to attribute this call to") + ] = None + project_id: Annotated[ + Optional[str], Field(description="The project to attribute this call to") + ] = None + application_id: Annotated[ + Optional[str], Field(description="The application to attribute this call to") + ] = None + model_id: Annotated[ + Optional[str], + Field(description="The ID of the model requested", examples=["llama_31_8b"]), + ] = None + model_family: Annotated[ + Optional[str], Field(description="The family of the model", examples=["llama"]) + ] = None + model_name: Annotated[ + Optional[str], + Field(description="The name of the model", examples=["LLaMa v3.1 8B"]), + ] = None + model_hugging_face_path: Annotated[ + Optional[str], + Field( + description="The Hugging Face path of the model", + examples=["meta/llama31_8b"], + ), + ] = None + model_used_id: Annotated[ + Optional[str], + Field( + description="The ID of the model that did the inference", + examples=["llama_31_8b_0125"], + ), + ] = None + cloud_region: Annotated[ + Optional[str], + Field(description="The region of cloud hosting", examples=["us-central1"]), + ] = None + managed_service_id: Annotated[ + Optional[str], + Field( + description="The ID of a managed service provider", examples=["aws-bedrock"] + ), + ] = None + cloud_id: Annotated[ + Optional[str], Field(description="The ID of the cloud", examples=["aws"]) + ] = None + cloud_instance_id: Annotated[ + Optional[str], + Field(description="The instance type in the cloud", examples=["xl-4g-8a100"]), + ] = None + node_id: Annotated[ + Optional[str], + Field( + description="The ID of a custom or global node", + examples=["h200-2024-build"], + ), + ] = None + country: Optional[CountryCode] = None + region: Optional[RegionCode] = None + task: Annotated[Optional[Task], Field(examples=["text-generation"])] = None + input_tokens: Annotated[ + Optional[int], + Field( + description="the number of input (or prompt) tokens", + examples=[128], + ge=0, + le=100000000, + ), + ] = None + input_audio_seconds: Annotated[ + Optional[int], + Field( + description="the duration of audio input in seconds", + examples=[60], + ge=0, + le=100000, + ), + ] = None + output_tokens: Annotated[ + Optional[int], + Field( + description="the number of output (or completion) tokens", + examples=[128], + ge=0, + le=100000000, + ), + ] = None + input_images: Annotated[Optional[List[Image]], Field(max_length=100)] = None + input_steps: Annotated[ + Optional[int], + Field( + description="the number of steps to use in the model", + examples=[50], + ge=1, + le=10000, + ), + ] = None + output_images: Annotated[ + Optional[List[Image]], + Field(description="a list of output image sizes", max_length=100), + ] = None + output_audio_seconds: Annotated[ + Optional[float], + Field( + description="the duration of audio output in seconds", + examples=[60], + ge=0.0, + le=100000.0, + ), + ] = None + output_audio_tokens: Annotated[ + Optional[int], + Field( + description="the number of audio tokens in the output", + examples=[2300], + ge=0, + le=100000000, + ), + ] = None + output_video_frames: Annotated[ + Optional[int], + Field( + description="the number of video frames (frame rate x duration)", + examples=[60], + ge=0, + le=100000000, + ), + ] = None + output_video_resolution: Annotated[ + Optional[int], + Field( + description="the resolution of the video in number of lines (for instance, 1080 for 1080p)", + examples=[1080], + ), + ] = None class Model(BaseModel): model_config = ConfigDict( - extra="allow", - ) - id: Optional[str] = Field(None, examples=["gpt-4-turbo"]) - name: Optional[str] = Field(None, examples=["GPT-4 Turbo"]) - family: Optional[str] = Field(None, examples=["gpt"]) - hugging_face_path: Optional[str] = Field(None, examples=["EleutherAI/gpt-neo-2.7B"]) - benchmark_model_id: Optional[str] = Field(None, examples=["GPTJ-6B"]) - total_params_billions: Optional[float] = Field(None, examples=[175]) - number_of_experts: Optional[int] = Field(None, examples=[7]) - params_per_expert_billions: Optional[float] = Field(None, examples=[8]) - tensor_parallelism: Optional[int] = Field(None, examples=[1]) - datatype: Optional[DataType] = Field(None, examples=["fp8"]) - task: Optional[Task] = Field(None, examples=["text-generation"]) - training_usage_energy_kwh: Optional[float] = Field(None, examples=[1013.1]) - training_usage_emissions_kgco2e: Optional[float] = Field(None, examples=[1013.1]) - training_usage_water_l: Optional[float] = Field(None, examples=[1013.1]) - training_embodied_emissions_kgco2e: Optional[float] = Field( - None, examples=[11013.1] - ) - training_embodied_water_l: Optional[float] = Field(None, examples=[11013.1]) - estimated_use_life_days: Optional[float] = Field(None, examples=[1013.1]) - estimated_requests_per_day: Optional[float] = Field(None, examples=[1013.1]) - fine_tuned_from_model_id: Optional[str] = Field(None, examples=["llama_31_8b"]) + extra="forbid", + ) + id: Annotated[str, Field(examples=["gpt-4-turbo"])] + name: Annotated[Optional[str], Field(examples=["GPT-4 Turbo"])] = None + family: Annotated[Optional[str], Field(examples=["gpt"])] = None + hugging_face_path: Annotated[ + Optional[str], Field(examples=["EleutherAI/gpt-neo-2.7B"]) + ] = None + benchmark_model_id: Annotated[Optional[str], Field(examples=["GPTJ-6B"])] = None + total_params_billions: Annotated[Optional[float], Field(examples=[175])] = None + number_of_experts: Annotated[Optional[int], Field(examples=[7])] = None + params_per_expert_billions: Annotated[Optional[float], Field(examples=[8])] = None + tensor_parallelism: Annotated[Optional[int], Field(examples=[1])] = None + datatype: Annotated[Optional[DataType], Field(examples=["fp8"])] = None + task: Annotated[Optional[Task], Field(examples=["text-generation"])] = None + training_usage_energy_kwh: Annotated[Optional[float], Field(examples=[1013.1])] = ( + None + ) + training_usage_emissions_kgco2e: Annotated[ + Optional[float], Field(examples=[1013.1]) + ] = None + training_usage_water_l: Annotated[Optional[float], Field(examples=[1013.1])] = None + training_embodied_emissions_kgco2e: Annotated[ + Optional[float], Field(examples=[11013.1]) + ] = None + training_embodied_water_l: Annotated[Optional[float], Field(examples=[11013.1])] = ( + None + ) + estimated_use_life_days: Annotated[Optional[float], Field(examples=[1013.1])] = None + estimated_requests_per_day: Annotated[Optional[float], Field(examples=[1013.1])] = ( + None + ) + fine_tuned_from_model_id: Annotated[ + Optional[str], Field(examples=["llama_31_8b"]) + ] = None class GridMix(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) country: CountryCode region: Optional[RegionCode] = None - gco2e_per_kwh: confloat(ge=0.0, le=2000.0) = Field(..., examples=[475]) - - -class Node(BaseModel): - model_config = ConfigDict( - extra="allow", - ) - id: Optional[str] = Field(None, examples=["base-node-xl"]) - cloud_id: Optional[str] = Field(None, examples=["aws"]) - cloud_instance_id: Optional[str] = Field(None, examples=["a2-highgpu-1g"]) - managed_service_id: Optional[str] = Field(None, examples=["aws-bedrock"]) - gpu_id: Optional[str] = Field(None, examples=["a100_40gb"]) - gpu: Optional[GPU] = None - gpu_count: Optional[conint(ge=0, le=10000)] = Field(None, examples=[8]) - cpu_count: Optional[conint(ge=1, le=10000)] = Field(None, examples=[2]) - idle_power_w_ex_gpu: Optional[confloat(ge=0.0, le=10000.0)] = Field( - None, examples=[100] - ) - average_utilization_rate: Optional[confloat(ge=0.0, le=1.0)] = Field( - None, examples=[0.8] - ) - embodied_emissions_kgco2e_ex_gpu: Optional[confloat(ge=0.0, le=100000.0)] = Field( - None, examples=[2500] - ) - embodied_water_l_ex_gpu: Optional[confloat(ge=0.0, le=100000.0)] = Field( - None, examples=[2500] - ) - use_life_years: Optional[confloat(ge=1.0, le=30.0)] = Field(None, examples=[5]) + gco2e_per_kwh: Annotated[float, Field(examples=[475], ge=0.0, le=2000.0)] class ModelResponse(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) - models: List[Model] = Field(..., max_length=100) + models: Annotated[List[Model], Field(max_length=100)] -class NodeResponse(BaseModel): - model_config = ConfigDict( - extra="allow", - ) - nodes: List[Node] = Field(..., max_length=100) - - -class ImpactLogRequest(BaseModel): +class ImpactRequest(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) - rows: List[ImpactLogRow] = Field(..., max_length=1000) - - -class ImpactRow(BaseModel): - model_config = ConfigDict( - extra="allow", - ) - utc_datetime: Optional[datetime] = Field( - None, - description="The start time of the request in UTC", - examples=["2022-01-01T00:00:00Z"], - ) - model: Model - cloud_region: Optional[str] = Field( - None, description="The region of cloud hosting", examples=["us-central1"] - ) - node: Optional[Node] = None - country: Optional[CountryCode] = None - region: Optional[RegionCode] = None - task: Optional[Task] = Field(None, examples=["text-generation"]) - input_tokens: Optional[conint(ge=0, le=100000000)] = Field( - None, description="the number of input (or prompt) tokens", examples=[128] - ) - input_audio_seconds: Optional[conint(ge=0, le=100000)] = Field( - None, description="the duration of audio input in seconds", examples=[60] - ) - output_audio_seconds: Optional[conint(ge=0, le=100000)] = Field( - None, description="the duration of audio output in seconds", examples=[60] - ) - output_tokens: Optional[conint(ge=0, le=100000000)] = Field( - None, description="the number of output (or completion) tokens", examples=[128] - ) - input_images: Optional[List[Image]] = Field(None, max_length=100) - input_steps: Optional[conint(ge=1, le=10000)] = Field( - None, description="the number of steps to use in the model", examples=[50] - ) - output_images: Optional[List[Image]] = Field( - None, description="a list of output image sizes", max_length=100 - ) - output_video_frames: Optional[conint(ge=0, le=100000000)] = Field( - None, - description="the number of video frames (frame rate x duration)", - examples=[60], - ) - output_video_resolution: Optional[int] = Field( - None, - description="the resolution of the video in number of lines (for instance, 1080 for 1080p)", - examples=[1080], - ) - request_duration_ms: Optional[float] = Field( - None, - description="The time the request took (as measured by client or proxy)", - examples=[283], - ) - managed_service_id: Optional[str] = Field( - None, - description="The ID of a managed service provider", - examples=["aws-bedrock"], - ) - model_used: Optional[Model] = None + rows: Annotated[List[ImpactRow], Field(max_length=1000)] class DebugInfo(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) model: Optional[Model] = None hardware_node: Optional[Node] = None grid_mix: Optional[GridMix] = None - steps: Optional[List[PredictionStep]] = Field(None, max_length=100) - - -class ImpactRequest(BaseModel): - model_config = ConfigDict( - extra="allow", - ) - rows: List[ImpactRow] = Field(..., max_length=1000) + steps: Annotated[Optional[List[PredictionStep]], Field(max_length=100)] = None class ModeledRow(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) inference_impact: Optional[ImpactMetrics] = None training_impact: Optional[ImpactMetrics] = None @@ -568,10 +670,10 @@ class ModeledRow(BaseModel): class ImpactResponse(BaseModel): model_config = ConfigDict( - extra="allow", + extra="forbid", ) - rows: List[ModeledRow] = Field(..., max_length=1000) - total_energy_wh: Optional[float] = Field(None, examples=[0.13]) - total_gco2e: Optional[float] = Field(None, examples=[0.81]) - total_mlh2o: Optional[float] = Field(None, examples=[1.32]) - has_errors: bool = Field(..., examples=[False]) + rows: Annotated[List[ModeledRow], Field(max_length=1000)] + total_energy_wh: Annotated[Optional[float], Field(examples=[0.13])] = None + total_gco2e: Annotated[Optional[float], Field(examples=[0.81])] = None + total_mlh2o: Annotated[Optional[float], Field(examples=[1.32])] = None + has_errors: Annotated[bool, Field(examples=[False])] diff --git a/scope3ai/tracers/anthropic/chat.py b/scope3ai/tracers/anthropic/chat.py index 1fa228a..674f8af 100644 --- a/scope3ai/tracers/anthropic/chat.py +++ b/scope3ai/tracers/anthropic/chat.py @@ -17,7 +17,7 @@ from anthropic.types.raw_message_stream_event import RawMessageStreamEvent from typing_extensions import override -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import Scope3AIContext, ImpactRow from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -59,7 +59,7 @@ def __stream_text__(self) -> Iterator[str]: # type: ignore[misc] requests_latency = time.perf_counter() - timer_start if model_name is not None: scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=input_tokens, output_tokens=output_tokens, request_duration_ms=requests_latency * 1000, @@ -102,7 +102,7 @@ async def __stream_text__(self) -> AsyncIterator[str]: # type: ignore[misc] requests_latency = time.perf_counter() - timer_start if model_name is not None: scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=input_tokens, output_tokens=output_tokens, request_duration_ms=requests_latency * 1000, @@ -173,7 +173,7 @@ def __stream__(self) -> Iterator[_T]: request_latency = time.perf_counter() - timer_start scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, input_tokens=input_tokens, output_tokens=output_tokens, request_duration_ms=request_latency * 1000, @@ -206,7 +206,7 @@ async def __stream__(self) -> AsyncIterator[_T]: request_latency = time.perf_counter() - timer_start scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, input_tokens=input_tokens, output_tokens=output_tokens, request_duration_ms=request_latency * 1000, @@ -225,7 +225,7 @@ def __init__(self, parent) -> None: # noqa: ANN001 def _anthropic_chat_wrapper(response: Message, request_latency: float) -> Message: model_name = response.model scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens, request_duration_ms=request_latency * 1000, @@ -259,7 +259,7 @@ async def _anthropic_async_chat_wrapper( ) -> Message: model_name = response.model scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=response.usage.input_tokens, output_tokens=response.usage.output_tokens, request_duration_ms=request_latency * 1000, diff --git a/scope3ai/tracers/cohere/chat.py b/scope3ai/tracers/cohere/chat.py index 5d0bb1d..a0f8d63 100644 --- a/scope3ai/tracers/cohere/chat.py +++ b/scope3ai/tracers/cohere/chat.py @@ -13,7 +13,7 @@ from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import Scope3AIContext, ImpactRow PROVIDER = PROVIDERS.COHERE.value @@ -43,7 +43,7 @@ def cohere_chat_wrapper( request_latency = time.perf_counter() - timer_start model_name = kwargs.get("model", "command-r") scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=response.meta.tokens.input_tokens, output_tokens=response.meta.tokens.output_tokens, request_duration_ms=request_latency * 1000, @@ -64,7 +64,7 @@ async def cohere_async_chat_wrapper( request_latency = time.perf_counter() - timer_start model_name = kwargs.get("model", "command-r") scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=response.meta.tokens.input_tokens, output_tokens=response.meta.tokens.output_tokens, request_duration_ms=request_latency * 1000, @@ -89,7 +89,7 @@ def cohere_stream_chat_wrapper( input_tokens = event.response.meta.tokens.input_tokens output_tokens = event.response.meta.tokens.output_tokens scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=input_tokens, output_tokens=output_tokens, request_duration_ms=request_latency * 1000, @@ -116,7 +116,7 @@ async def cohere_async_stream_chat_wrapper( input_tokens = event.response.meta.tokens.input_tokens output_tokens = event.response.meta.tokens.output_tokens scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=input_tokens, output_tokens=output_tokens, request_duration_ms=request_latency * 1000, diff --git a/scope3ai/tracers/cohere/chat_v2.py b/scope3ai/tracers/cohere/chat_v2.py index 9c2afb5..17a8144 100644 --- a/scope3ai/tracers/cohere/chat_v2.py +++ b/scope3ai/tracers/cohere/chat_v2.py @@ -10,7 +10,7 @@ StreamedChatResponseV2 as _StreamedChatResponseV2, ) -from scope3ai.api.types import ImpactRow, Model, Scope3AIContext +from scope3ai.api.types import ImpactRow, Scope3AIContext from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -46,7 +46,7 @@ def cohere_chat_v2_wrapper( request_latency = time.perf_counter() - timer_start model_name = kwargs["model"] scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=response.usage.tokens.input_tokens, output_tokens=response.usage.tokens.output_tokens, request_duration_ms=request_latency * 1000, @@ -67,7 +67,7 @@ async def cohere_async_chat_v2_wrapper( request_latency = time.perf_counter() - timer_start model_name = kwargs["model"] scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=response.usage.tokens.input_tokens, output_tokens=response.usage.tokens.output_tokens, request_duration_ms=request_latency * 1000, @@ -93,7 +93,7 @@ def cohere_stream_chat_v2_wrapper( input_tokens = event.delta.usage.tokens.input_tokens output_tokens = event.delta.usage.tokens.output_tokens scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=input_tokens, output_tokens=output_tokens, request_duration_ms=request_latency * 1000, @@ -119,7 +119,7 @@ async def cohere_async_stream_chat_v2_wrapper( input_tokens = event.delta.usage.tokens.input_tokens output_tokens = event.delta.usage.tokens.output_tokens scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=input_tokens, output_tokens=output_tokens, request_duration_ms=request_latency * 1000, diff --git a/scope3ai/tracers/huggingface/chat.py b/scope3ai/tracers/huggingface/chat.py index 05e4441..3679b2e 100644 --- a/scope3ai/tracers/huggingface/chat.py +++ b/scope3ai/tracers/huggingface/chat.py @@ -4,12 +4,15 @@ from typing import Any, Callable, Optional, Union import tiktoken -from huggingface_hub import AsyncInferenceClient, InferenceClient # type: ignore[import-untyped] +from huggingface_hub import ( # type: ignore[import-untyped] + AsyncInferenceClient, + InferenceClient, +) from huggingface_hub import ChatCompletionOutput as _ChatCompletionOutput from huggingface_hub import ChatCompletionStreamOutput as _ChatCompletionStreamOutput from requests import Response -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import ImpactRow, Scope3AIContext from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI from scope3ai.response_interceptor.requests_interceptor import requests_response_capture @@ -45,7 +48,7 @@ def huggingface_chat_wrapper_non_stream( with requests_response_capture() as responses: response = wrapped(*args, **kwargs) http_responses = responses.get() - if len(http_responses) > 0: + if http_responses: http_response = http_responses[0] model = ( instance.model @@ -57,7 +60,7 @@ def huggingface_chat_wrapper_non_stream( else: compute_time = time.perf_counter() - timer_start scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, input_tokens=response.usage.prompt_tokens, output_tokens=response.usage.completion_tokens, request_duration_ms=float(compute_time) * 1000, @@ -84,7 +87,7 @@ def huggingface_chat_wrapper_stream( token_count += 1 request_latency = time.perf_counter() - timer_start scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, output_tokens=token_count, request_duration_ms=request_latency * 1000, managed_service_id=PROVIDER, @@ -120,7 +123,7 @@ async def huggingface_async_chat_wrapper_non_stream( encoder = tiktoken.get_encoding("cl100k_base") output_tokens = len(encoder.encode(response.choices[0].message.content)) scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, input_tokens=response.usage.prompt_tokens, output_tokens=output_tokens, request_duration_ms=request_latency * 1000, @@ -140,12 +143,12 @@ async def huggingface_async_chat_wrapper_stream( timer_start = time.perf_counter() stream = await wrapped(*args, **kwargs) token_count = 0 - model_used = instance.model or kwargs["model"] + model = instance.model or kwargs["model"] async for chunk in stream: token_count += 1 request_latency = time.perf_counter() - timer_start scope3_row = ImpactRow( - model=Model(id=model_used), + model_id=model, output_tokens=token_count, request_duration_ms=request_latency * 1000, # TODO: can we get the header that has the processing time diff --git a/scope3ai/tracers/huggingface/image_to_image.py b/scope3ai/tracers/huggingface/image_to_image.py index 99c527a..e712839 100644 --- a/scope3ai/tracers/huggingface/image_to_image.py +++ b/scope3ai/tracers/huggingface/image_to_image.py @@ -4,13 +4,17 @@ from typing import Any, Callable, Optional, Union import tiktoken -from PIL import Image from aiohttp import ClientResponse +from huggingface_hub import ( # type: ignore[import-untyped] + AsyncInferenceClient, + InferenceClient, +) from huggingface_hub import ImageToImageOutput as _ImageToImageOutput -from huggingface_hub import InferenceClient, AsyncInferenceClient # type: ignore[import-untyped] +from PIL import Image from requests import Response -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import ImpactRow, Scope3AIContext +from scope3ai.api.typesgen import Image as RootImage from scope3ai.api.typesgen import Task from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -30,15 +34,16 @@ def _hugging_face_image_to_image_wrapper( timer_start: Any, model: Any, response: Any, - http_response: Union[ClientResponse, Response], + http_response: Optional[Union[ClientResponse, Response]], args: Any, kwargs: Any, ) -> ImageToImageOutput: + compute_time = time.perf_counter() - timer_start + input_tokens = 0 if http_response: - compute_time = http_response.headers.get("x-compute-time") + compute_time = http_response.headers.get("x-compute-time") or compute_time input_tokens = http_response.headers.get("x-compute-characters") - else: - compute_time = time.perf_counter() - timer_start + if not input_tokens: encoder = tiktoken.get_encoding("cl100k_base") prompt = args[1] if len(args) > 1 else kwargs.get("prompt", "") input_tokens = len(encoder.encode(prompt)) if prompt != "" else 0 @@ -49,21 +54,18 @@ def _hugging_face_image_to_image_wrapper( else: input_image = Image.open(io.BytesIO(image_param)) input_width, input_height = input_image.size - input_images = [ - ("{width}x{height}".format(width=input_width, height=input_height)) - ] + input_images = [RootImage(root=f"{input_width}x{input_height}")] except Exception: + input_images = [] pass output_width, output_height = response.size scope3_row = ImpactRow( - model=Model(id=model), - input_tokens=input_tokens, + model_id=model, + input_tokens=int(input_tokens), task=Task.image_generation, request_duration_ms=float(compute_time) * 1000, managed_service_id=PROVIDER, - output_images=[ - "{width}x{height}".format(width=output_width, height=output_height) - ], + output_images=[RootImage(root=f"{output_width}x{output_height}")], input_images=input_images, ) @@ -81,7 +83,7 @@ def huggingface_image_to_image_wrapper( with requests_response_capture() as responses: response = wrapped(*args, **kwargs) http_responses = responses.get() - if len(http_responses) > 0: + if http_responses: http_response = http_responses[-1] model = kwargs.get("model") or instance.get_recommended_model( HUGGING_FACE_IMAGE_TO_IMAGE_TASK @@ -99,7 +101,7 @@ async def huggingface_image_to_image_wrapper_async( with aiohttp_response_capture() as responses: response = await wrapped(*args, **kwargs) http_responses = responses.get() - if len(http_responses) > 0: + if http_responses: http_response = http_responses[-1] model = kwargs.get("model") or instance.get_recommended_model( HUGGING_FACE_IMAGE_TO_IMAGE_TASK diff --git a/scope3ai/tracers/huggingface/speech_to_text.py b/scope3ai/tracers/huggingface/speech_to_text.py index 11952c5..d768ee6 100644 --- a/scope3ai/tracers/huggingface/speech_to_text.py +++ b/scope3ai/tracers/huggingface/speech_to_text.py @@ -1,16 +1,18 @@ import time -from dataclasses import dataclass, asdict +from dataclasses import asdict, dataclass from typing import Any, Callable, Optional, Union from aiohttp import ClientResponse from huggingface_hub import ( - AutomaticSpeechRecognitionOutput as _AutomaticSpeechRecognitionOutput, AsyncInferenceClient, + InferenceClient, # type: ignore[import-untyped] +) +from huggingface_hub import ( + AutomaticSpeechRecognitionOutput as _AutomaticSpeechRecognitionOutput, ) -from huggingface_hub import InferenceClient # type: ignore[import-untyped] from requests import Response -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import ImpactRow, Scope3AIContext from scope3ai.api.typesgen import Task from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -30,7 +32,7 @@ def _hugging_face_automatic_recognition_wrapper( timer_start: Any, model: Any, response: Any, - http_response: Union[ClientResponse, Response], + http_response: Optional[Union[ClientResponse, Response]], args: Any, kwargs: Any, ) -> AutomaticSpeechRecognitionOutput: @@ -43,7 +45,7 @@ def _hugging_face_automatic_recognition_wrapper( if not compute_audio_length: compute_audio_length = 0 scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, task=Task.text_to_speech, input_audio_seconds=int(float(compute_audio_length)), request_duration_ms=float(compute_time) * 1000, @@ -64,7 +66,7 @@ def huggingface_automatic_recognition_output_wrapper( with requests_response_capture() as responses: response = wrapped(*args, **kwargs) http_responses = responses.get() - if len(http_responses) > 0: + if http_responses: http_response = http_responses[-1] model = kwargs.get("model") or instance.get_recommended_model( HUGGING_FACE_SPEECH_TO_TEXT_TASK @@ -82,7 +84,7 @@ async def huggingface_automatic_recognition_output_wrapper_async( with aiohttp_response_capture() as responses: response = await wrapped(*args, **kwargs) http_responses = responses.get() - if len(http_responses) > 0: + if http_responses: http_response = http_responses[-1] model = kwargs.get("model") or instance.get_recommended_model( HUGGING_FACE_SPEECH_TO_TEXT_TASK diff --git a/scope3ai/tracers/huggingface/text_to_image.py b/scope3ai/tracers/huggingface/text_to_image.py index fedafe3..d6e2ad1 100644 --- a/scope3ai/tracers/huggingface/text_to_image.py +++ b/scope3ai/tracers/huggingface/text_to_image.py @@ -4,11 +4,15 @@ import tiktoken from aiohttp import ClientResponse -from huggingface_hub import InferenceClient, AsyncInferenceClient # type: ignore[import-untyped] +from huggingface_hub import ( # type: ignore[import-untyped] + AsyncInferenceClient, + InferenceClient, +) from huggingface_hub import TextToImageOutput as _TextToImageOutput from requests import Response -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import ImpactRow, Scope3AIContext +from scope3ai.api.typesgen import Image as RootImage from scope3ai.api.typesgen import Task from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -28,26 +32,25 @@ def _hugging_face_text_to_image_wrapper( timer_start: Any, model: Any, response: Any, - http_response: Union[ClientResponse, Response], + http_response: Optional[Union[ClientResponse, Response]], args: Any, kwargs: Any, ) -> TextToImageOutput: - input_tokens = None + input_tokens = 0 + compute_time = time.perf_counter() - timer_start if http_response: - compute_time = http_response.headers.get("x-compute-time") + compute_time = http_response.headers.get("x-compute-time") or compute_time input_tokens = http_response.headers.get("x-compute-characters") - else: - compute_time = time.perf_counter() - timer_start if not input_tokens: encoder = tiktoken.get_encoding("cl100k_base") prompt = args[0] if len(args) > 0 else kwargs.get("prompt", "") input_tokens = len(encoder.encode(prompt)) if prompt != "" else 0 width, height = response.size scope3_row = ImpactRow( - model=Model(id=model), - input_tokens=input_tokens, + model_id=model, + input_tokens=int(input_tokens), task=Task.text_to_image, - output_images=["{width}x{height}".format(width=width, height=height)], + output_images=[RootImage(root=f"{width}x{height}")], request_duration_ms=float(compute_time) * 1000, managed_service_id=PROVIDER, ) @@ -66,7 +69,7 @@ def huggingface_text_to_image_wrapper( with requests_response_capture() as responses: response = wrapped(*args, **kwargs) http_responses = responses.get() - if len(http_responses) > 0: + if http_responses: http_response = http_responses[-1] model = kwargs.get("model") or instance.get_recommended_model( HUGGING_FACE_TEXT_TO_IMAGE_TASK @@ -84,7 +87,7 @@ async def huggingface_text_to_image_wrapper_async( with aiohttp_response_capture() as responses: response = await wrapped(*args, **kwargs) http_responses = responses.get() - if len(http_responses) > 0: + if http_responses: http_response = http_responses[-1] model = kwargs.get("model") or instance.get_recommended_model( HUGGING_FACE_TEXT_TO_IMAGE_TASK diff --git a/scope3ai/tracers/huggingface/text_to_speech.py b/scope3ai/tracers/huggingface/text_to_speech.py index 329f76c..2874fb2 100644 --- a/scope3ai/tracers/huggingface/text_to_speech.py +++ b/scope3ai/tracers/huggingface/text_to_speech.py @@ -4,11 +4,14 @@ import tiktoken from aiohttp import ClientResponse -from huggingface_hub import InferenceClient, AsyncInferenceClient # type: ignore[import-untyped] +from huggingface_hub import ( # type: ignore[import-untyped] + AsyncInferenceClient, + InferenceClient, +) from huggingface_hub import TextToSpeechOutput as _TextToSpeechOutput from requests import Response -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import ImpactRow, Scope3AIContext from scope3ai.api.typesgen import Task from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -28,24 +31,23 @@ def _hugging_face_text_to_speech_wrapper( timer_start: Any, model: Any, response: Any, - http_response: Union[ClientResponse, Response], + http_response: Optional[Union[ClientResponse, Response]], args: Any, kwargs: Any, ) -> TextToSpeechOutput: input_tokens = None + compute_time = time.perf_counter() - timer_start if http_response: - compute_time = http_response.headers.get("x-compute-time") + compute_time = http_response.headers.get("x-compute-time") or compute_time input_tokens = http_response.headers.get("x-compute-characters") - else: - compute_time = time.perf_counter() - timer_start if not input_tokens: encoder = tiktoken.get_encoding("cl100k_base") prompt = args[0] if len(args) > 0 else kwargs.get("text", "") input_tokens = len(encoder.encode(prompt)) if prompt != "" else 0 scope3_row = ImpactRow( - model=Model(id=model), - input_tokens=input_tokens, + model_id=model, + input_tokens=int(input_tokens), task=Task.text_to_speech, request_duration_ms=float(compute_time) * 1000, managed_service_id=PROVIDER, @@ -65,7 +67,7 @@ def huggingface_text_to_speech_wrapper( with requests_response_capture() as responses: response = wrapped(*args, **kwargs) http_responses = responses.get() - if len(http_responses) > 0: + if http_responses: http_response = http_responses[-1] model = kwargs.get("model") or instance.get_recommended_model( HUGGING_FACE_TEXT_TO_SPEECH_TASK @@ -83,7 +85,7 @@ async def huggingface_text_to_speech_wrapper_async( with aiohttp_response_capture() as responses: response = await wrapped(*args, **kwargs) http_responses = responses.get() - if len(http_responses) > 0: + if http_responses: http_response = http_responses[-1] model = kwargs.get("model") or instance.get_recommended_model( HUGGING_FACE_TEXT_TO_SPEECH_TASK diff --git a/scope3ai/tracers/huggingface/translation.py b/scope3ai/tracers/huggingface/translation.py index 5d9fa3e..ddae537 100644 --- a/scope3ai/tracers/huggingface/translation.py +++ b/scope3ai/tracers/huggingface/translation.py @@ -1,14 +1,17 @@ import time -from dataclasses import dataclass, asdict +from dataclasses import asdict, dataclass from typing import Any, Callable, Optional, Union import tiktoken from aiohttp import ClientResponse -from huggingface_hub import InferenceClient, AsyncInferenceClient # type: ignore[import-untyped] +from huggingface_hub import ( # type: ignore[import-untyped] + AsyncInferenceClient, + InferenceClient, +) from huggingface_hub import TranslationOutput as _TranslationOutput from requests import Response -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import ImpactRow, Scope3AIContext from scope3ai.api.typesgen import Task from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -28,25 +31,24 @@ def _hugging_face_translation_wrapper( timer_start: Any, model: Any, response: Any, - http_response: Union[ClientResponse, Response], + http_response: Optional[Union[ClientResponse, Response]], args: Any, kwargs: Any, ) -> TranslationOutput: encoder = tiktoken.get_encoding("cl100k_base") - input_tokens = None + input_tokens = 0 + compute_time = time.perf_counter() - timer_start if http_response: - compute_time = http_response.headers.get("x-compute-time") + compute_time = http_response.headers.get("x-compute-time") or compute_time input_tokens = http_response.headers.get("x-compute-characters") - else: - compute_time = time.perf_counter() - timer_start if not input_tokens: prompt = args[0] if len(args) > 0 else kwargs.get("text", "") input_tokens = len(encoder.encode(prompt)) if prompt != "" else 0 output_tokens = len(encoder.encode(response.translation_text)) scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, task=Task.translation, - input_tokens=input_tokens, + input_tokens=int(input_tokens), output_tokens=output_tokens, request_duration_ms=float(compute_time) * 1000, managed_service_id=PROVIDER, @@ -66,7 +68,7 @@ async def huggingface_translation_wrapper_async_non_stream( with aiohttp_response_capture() as responses: response = await wrapped(*args, **kwargs) http_responses = responses.get() - if len(http_responses) > 0: + if http_responses: http_response = http_responses[-1] model = kwargs.get("model") or instance.get_recommended_model( HUGGING_FACE_TRANSLATION_TASK @@ -84,7 +86,7 @@ def huggingface_translation_wrapper_non_stream( with requests_response_capture() as responses: response = wrapped(*args, **kwargs) http_responses = responses.get() - if len(http_responses) > 0: + if http_responses: http_response = http_responses[-1] model = kwargs.get("model") or instance.get_recommended_model( HUGGING_FACE_TRANSLATION_TASK diff --git a/scope3ai/tracers/huggingface/vision/image_classification.py b/scope3ai/tracers/huggingface/vision/image_classification.py index e4a40c1..b861a05 100644 --- a/scope3ai/tracers/huggingface/vision/image_classification.py +++ b/scope3ai/tracers/huggingface/vision/image_classification.py @@ -11,7 +11,7 @@ ) # type: ignore[import-untyped] from requests import Response -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import Scope3AIContext, ImpactRow from scope3ai.api.typesgen import Task from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -54,7 +54,7 @@ def _hugging_face_image_classification_wrapper( except Exception: pass scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, input_tokens=input_tokens, task=Task.image_classification, output_images=[], # No images to output in classification diff --git a/scope3ai/tracers/huggingface/vision/image_segmentation.py b/scope3ai/tracers/huggingface/vision/image_segmentation.py index b3c9777..c6e5534 100644 --- a/scope3ai/tracers/huggingface/vision/image_segmentation.py +++ b/scope3ai/tracers/huggingface/vision/image_segmentation.py @@ -12,7 +12,7 @@ ) # type: ignore[import-untyped] from requests import Response -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import Scope3AIContext, ImpactRow from scope3ai.api.typesgen import Task from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -56,7 +56,7 @@ def _hugging_face_image_segmentation_wrapper( except Exception: pass scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, input_tokens=input_tokens, task=Task.image_segmentation, request_duration_ms=float(compute_time) * 1000, diff --git a/scope3ai/tracers/huggingface/vision/object_detection.py b/scope3ai/tracers/huggingface/vision/object_detection.py index 71585a0..ffb453e 100644 --- a/scope3ai/tracers/huggingface/vision/object_detection.py +++ b/scope3ai/tracers/huggingface/vision/object_detection.py @@ -12,7 +12,7 @@ ) # type: ignore[import-untyped] from requests import Response -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import Scope3AIContext, ImpactRow from scope3ai.api.typesgen import Task from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -55,7 +55,7 @@ def _hugging_face_object_detection_wrapper( except Exception: pass scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, input_tokens=input_tokens, task=Task.object_detection, request_duration_ms=float(compute_time) * 1000, diff --git a/scope3ai/tracers/litellm/chat.py b/scope3ai/tracers/litellm/chat.py index d93de9f..3995829 100644 --- a/scope3ai/tracers/litellm/chat.py +++ b/scope3ai/tracers/litellm/chat.py @@ -6,7 +6,7 @@ from litellm.utils import CustomStreamWrapper from scope3ai import Scope3AI -from scope3ai.api.types import Scope3AIContext, Model, ImpactRow +from scope3ai.api.types import Scope3AIContext, ImpactRow from scope3ai.constants import PROVIDERS PROVIDER = PROVIDERS.LITELLM.value @@ -46,7 +46,7 @@ def litellm_chat_wrapper_stream( # type: ignore[misc] model = chunk.model if model is not None: scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, output_tokens=token_count, request_duration_ms=float(request_latency) * 1000, managed_service_id=PROVIDER, @@ -73,7 +73,7 @@ def litellm_chat_wrapper_non_stream( if model is None: return response scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, input_tokens=response.usage.prompt_tokens, output_tokens=response.usage.total_tokens, request_duration_ms=float(request_latency) * 1000, @@ -108,7 +108,7 @@ async def litellm_async_chat_wrapper_base( if model is None: return response scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, input_tokens=response.usage.prompt_tokens, output_tokens=response.usage.total_tokens, request_duration_ms=float(request_latency) * 1000, @@ -138,7 +138,7 @@ async def litellm_async_chat_wrapper_stream( # type: ignore[misc] model = chunk.model if model is not None: scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, output_tokens=token_count, request_duration_ms=float(request_latency) * 1000, managed_service_id=PROVIDER, diff --git a/scope3ai/tracers/mistralai/chat.py b/scope3ai/tracers/mistralai/chat.py index 05b7fc4..8c19651 100644 --- a/scope3ai/tracers/mistralai/chat.py +++ b/scope3ai/tracers/mistralai/chat.py @@ -9,7 +9,7 @@ from scope3ai import Scope3AI from scope3ai.api.types import Scope3AIContext -from scope3ai.api.typesgen import ImpactRow, Model +from scope3ai.api.typesgen import ImpactRow from scope3ai.constants import PROVIDERS PROVIDER = PROVIDERS.MISTRALAI.value @@ -33,7 +33,7 @@ def mistralai_v1_chat_wrapper( response = wrapped(*args, **kwargs) request_latency = time.perf_counter() - timer_start scope3_row = ImpactRow( - model=Model(id=response.model), + model_id=response.model, input_tokens=response.usage.prompt_tokens, output_tokens=response.usage.completion_tokens, request_duration_ms=request_latency * 1000, @@ -61,7 +61,7 @@ def mistralai_v1_chat_wrapper_stream( if chunk.data: request_latency = time.perf_counter() - timer_start scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=token_count, output_tokens=chunk.data.usage.completion_tokens if chunk.data.usage @@ -86,7 +86,7 @@ async def mistralai_v1_async_chat_wrapper( response = await wrapped(*args, **kwargs) request_latency = time.perf_counter() - timer_start scope3_row = ImpactRow( - model=Model(id=response.model), + model_id=response.model, input_tokens=response.usage.prompt_tokens, output_tokens=response.usage.completion_tokens, request_duration_ms=request_latency * 1000, @@ -108,7 +108,7 @@ async def _generator( request_latency = time.perf_counter() - timer_start model_name = chunk.data.model scope3_row = ImpactRow( - model=Model(id=model_name), + model_id=model_name, input_tokens=token_count, output_tokens=chunk.data.usage.completion_tokens if chunk.data.usage diff --git a/scope3ai/tracers/openai/chat.py b/scope3ai/tracers/openai/chat.py index 58fd43c..2730741 100644 --- a/scope3ai/tracers/openai/chat.py +++ b/scope3ai/tracers/openai/chat.py @@ -9,7 +9,8 @@ from openai.types.chat import ChatCompletion as _ChatCompletion from openai.types.chat import ChatCompletionChunk as _ChatCompletionChunk -from scope3ai.api.types import ImpactRow, Model, Scope3AIContext +from scope3ai.api.types import ImpactRow, Scope3AIContext +from scope3ai.api.typesgen import Image as RootImage from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -38,12 +39,12 @@ def _openai_aggregate_multimodal_image(content: dict, row: ImpactRow) -> None: image_data = BytesIO(base64.b64decode(data)) image = Image.open(image_data) width, height = image.size - size = f"{width}x{height}" + size = RootImage(root=f"{width}x{height}") if row.input_images is None: - row.input_images = size + row.input_images = [size] else: - row.input_images += f",{size}" + row.input_images.append(size) else: # TODO: not supported yet. @@ -94,8 +95,8 @@ def _openai_chat_wrapper( model_used = response.model scope3_row = ImpactRow( - model=Model(id=model_requested), - model_used=Model(id=model_used), + model_id=model_requested, + model_used_id=model_used, input_tokens=response.usage.prompt_tokens, output_tokens=response.usage.completion_tokens, request_duration_ms=request_latency * 1000, @@ -147,8 +148,8 @@ def openai_chat_wrapper_stream( model_used = chunk.model scope3_row = ImpactRow( - model=Model(id=model_requested), - model_used=Model(id=model_used), + model_id=model_requested, + model_used_id=model_used, input_tokens=chunk.usage.prompt_tokens, output_tokens=chunk.usage.completion_tokens, request_duration_ms=request_latency @@ -198,8 +199,8 @@ async def openai_async_chat_wrapper_stream( model_used = chunk.model scope3_row = ImpactRow( - model=Model(id=model_requested), - model_used=Model(id=model_used), + model_id=model_requested, + model_used_id=model_used, input_tokens=chunk.usage.prompt_tokens, output_tokens=chunk.usage.completion_tokens, request_duration_ms=request_latency diff --git a/scope3ai/tracers/openai/speech_to_text.py b/scope3ai/tracers/openai/speech_to_text.py index 91debb1..db9b5ab 100644 --- a/scope3ai/tracers/openai/speech_to_text.py +++ b/scope3ai/tracers/openai/speech_to_text.py @@ -9,7 +9,7 @@ TranscriptionVerbose as _TranscriptionVerbose, ) -from scope3ai.api.types import ImpactRow, Model, Scope3AIContext, Task +from scope3ai.api.types import ImpactRow, Scope3AIContext, Task from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -51,8 +51,8 @@ def _openai_speech_to_text_wrapper( options["input_audio_seconds"] = int(duration) scope3_row = ImpactRow( - model=Model(id=model), - provider=PROVIDER, + model_id=model, + managed_service_id=PROVIDER, output_tokens=output_tokens, request_duration_ms=request_latency, task=Task.speech_to_text, diff --git a/scope3ai/tracers/openai/text_to_image.py b/scope3ai/tracers/openai/text_to_image.py index 98bac1f..132b791 100644 --- a/scope3ai/tracers/openai/text_to_image.py +++ b/scope3ai/tracers/openai/text_to_image.py @@ -4,7 +4,8 @@ from openai.resources.images import AsyncImages, Images from openai.types.images_response import ImagesResponse as _ImageResponse -from scope3ai.api.types import ImpactRow, Model, Scope3AIContext, Task +from scope3ai.api.types import ImpactRow, Scope3AIContext, Task +from scope3ai.api.typesgen import Image as RootImage from scope3ai.lib import Scope3AI PROVIDER = "openai" @@ -21,11 +22,11 @@ def _openai_image_wrapper( response: _ImageResponse, request_latency: float, **kwargs: Any ) -> ImageResponse: model = kwargs.get("model", DEFAULT_MODEL) - size = kwargs.get("size", DEFAULT_SIZE) + size = RootImage(root=kwargs.get("size", DEFAULT_SIZE)) n = kwargs.get("n", DEFAULT_N) scope3_row = ImpactRow( - model=Model(id=model), + model_id=model, task=Task.text_to_image, output_images=[size] * n, request_duration_ms=request_latency * 1000, diff --git a/scope3ai/tracers/openai/text_to_speech.py b/scope3ai/tracers/openai/text_to_speech.py index 07471ed..f099bbc 100644 --- a/scope3ai/tracers/openai/text_to_speech.py +++ b/scope3ai/tracers/openai/text_to_speech.py @@ -5,7 +5,7 @@ import tiktoken from openai.resources.audio.speech import AsyncSpeech, Speech, _legacy_response -from scope3ai.api.types import ImpactRow, Model, Scope3AIContext, Task +from scope3ai.api.types import ImpactRow, Scope3AIContext, Task from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -41,11 +41,11 @@ def _openai_text_to_speech_submit( input_tokens = len(encoder.encode(kwargs["input"])) scope3_row = ImpactRow( - model=Model(id=model_requested), + model_id=model_requested, input_tokens=input_tokens, request_duration_ms=request_latency, - provider=PROVIDER, - audio_output_seconds=duration, + managed_service_id=PROVIDER, + output_audio_seconds=duration, task=Task.text_to_speech, ) diff --git a/scope3ai/tracers/openai/translation.py b/scope3ai/tracers/openai/translation.py index 5e7521a..17ee53b 100644 --- a/scope3ai/tracers/openai/translation.py +++ b/scope3ai/tracers/openai/translation.py @@ -9,7 +9,7 @@ TranslationVerbose as _TranslationVerbose, ) -from scope3ai.api.types import ImpactRow, Model, Scope3AIContext, Task +from scope3ai.api.types import ImpactRow, Scope3AIContext, Task from scope3ai.constants import PROVIDERS from scope3ai.lib import Scope3AI @@ -51,8 +51,8 @@ def _openai_translation_wrapper( options["input_audio_seconds"] = int(duration) scope3_row = ImpactRow( - model=Model(id=model), - provider=PROVIDER, + model_id=model, + managed_service_id=PROVIDER, output_tokens=output_tokens, request_duration_ms=request_latency, task=Task.translation, diff --git a/tests/api-mocks/aiapi.yaml b/tests/api-mocks/aiapi.yaml index fb41017..cb32c7a 100644 --- a/tests/api-mocks/aiapi.yaml +++ b/tests/api-mocks/aiapi.yaml @@ -8,7 +8,7 @@ paths: /status: get: servers: - - url: http://127.0.0.1:4010 + - url: https://aiapi.scope3.com description: API server operationId: status responses: @@ -58,7 +58,7 @@ paths: /model: get: servers: - - url: http://127.0.0.1:4010 + - url: https://aiapi.scope3.com description: API server security: - bearerAuth: [] @@ -118,24 +118,46 @@ paths: /node: get: servers: - - url: http://127.0.0.1:4010 + - url: https://aiapi.scope3.com description: API server security: - bearerAuth: [] - summary: List nodes + summary: List nodes (both global and custom) operationId: listNodes parameters: - in: query name: service schema: - $ref: "#/components/schemas/ManagedServiceProvider" - description: Filter models by managed service + type: string + description: Filter nodes by managed service provider required: false - in: query name: cloud schema: - $ref: "#/components/schemas/CloudProvider" - description: Filter models by cloud provider + type: string + description: Filter nodes by cloud provider + required: false + - in: query + name: custom + schema: + type: boolean + description: | + Filter by node type: + - true: Show only custom nodes + - false: Show only global nodes + - Not provided: Show all nodes + required: false + - in: query + name: gpu + schema: + type: string + description: Filter nodes by GPU + required: false + - in: query + name: instance + schema: + type: string + description: Filter nodes by cloud instance type required: false responses: "200": @@ -144,14 +166,42 @@ paths: application/json: schema: $ref: "#/components/schemas/NodeResponse" - "406": - description: Not acceptable response + "401": + description: Unauthorized content: application/json: schema: $ref: "#/components/schemas/Error" - "429": - description: Too many requests + "403": + description: Forbidden + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + post: + servers: + - url: https://aiapi.scope3.com + description: API server + security: + - bearerAuth: [] + summary: Create a custom node + operationId: createNode + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/NodeCreateRequest" + responses: + "201": + description: Node created successfully + content: + application/json: + schema: + $ref: "#/components/schemas/Node" + "400": + description: Invalid request format content: application/json: schema: @@ -168,14 +218,125 @@ paths: application/json: schema: $ref: "#/components/schemas/Error" - "415": - description: Unsupported media type + "409": + description: Node ID already exists content: application/json: schema: $ref: "#/components/schemas/Error" - default: - description: Unexpected error + + /node/{nodeId}: + parameters: + - name: nodeId + in: path + required: true + schema: + type: string + get: + servers: + - url: https://aiapi.scope3.com + description: API server + security: + - bearerAuth: [] + summary: Get a specific node (global or custom) + operationId: getNode + responses: + "200": + description: Node retrieved successfully + content: + application/json: + schema: + $ref: "#/components/schemas/Node" + "401": + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "403": + description: Forbidden + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "404": + description: Node not found + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + put: + servers: + - url: https://aiapi.scope3.com + description: API server + security: + - bearerAuth: [] + summary: Update a node (custom nodes only, unless admin) + operationId: updateNode + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/NodeUpdateRequest" + responses: + "200": + description: Node updated successfully + content: + application/json: + schema: + $ref: "#/components/schemas/Node" + "400": + description: Invalid request format + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "401": + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "403": + description: Forbidden - Cannot modify global nodes without admin privileges + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "404": + description: Node not found + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + delete: + servers: + - url: https://aiapi.scope3.com + description: API server + security: + - bearerAuth: [] + summary: Delete a node (custom nodes only, unless admin) + operationId: deleteNode + responses: + "204": + description: Node deleted successfully + "401": + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "403": + description: Forbidden - Cannot delete global nodes without admin privileges + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + "404": + description: Node not found content: application/json: schema: @@ -184,7 +345,7 @@ paths: /gpu: get: servers: - - url: http://127.0.0.1:4010 + - url: https://aiapi.scope3.com description: API server security: - bearerAuth: [] @@ -237,7 +398,7 @@ paths: /v1/impact: post: servers: - - url: http://127.0.0.1:4010 + - url: https://aiapi.scope3.com description: API server security: - bearerAuth: [] @@ -248,7 +409,7 @@ paths: name: debug schema: type: boolean - description: Filter models by family + description: Return debug information required: false requestBody: required: true @@ -300,41 +461,10 @@ paths: schema: $ref: "#/components/schemas/Error" - /v1/log_json: - post: - servers: - - url: http://127.0.0.1:4010 - description: API server - security: - - bearerAuth: [] - summary: Log requests in JSON format - operationId: logImpactJSON - requestBody: - required: true - content: - application/json: - schema: - $ref: "#/components/schemas/ImpactLogRequest" - responses: - "200": - description: Impact metrics retrieved successfully - "406": - description: Not acceptable response - "429": - description: Too many requests - "401": - description: Unauthorized - "403": - description: Forbidden - "415": - description: Unsupported media type - default: - description: Unexpected error - /: post: servers: - - url: http://127.0.0.1:4010 + - url: https://aiapi.scope3.com description: API server security: - bearerAuth: [] @@ -397,6 +527,7 @@ components: type: http scheme: bearer bearerFormat: JWT + schemas: StatusResponse: title: Status Response @@ -436,6 +567,158 @@ components: items: $ref: "#/components/schemas/Node" + NodeCreateRequest: + type: object + required: + - id + - gpu_id + - gpu_count + - cpu_count + description: | + Create a new node. + Note on permissions: + - cloud_instance_id and managed_service_id can only be set by admins or users who own those resources + - Custom nodes are visible only to their owners + - Global nodes are visible to all users + - Admins can see and manage all nodes + properties: + id: + type: string + example: my-custom-node-1 + pattern: ^[a-z0-9-]+$ + minLength: 3 + maxLength: 64 + cloud_id: + type: string + example: aws + cloud_instance_id: + type: string + example: a2-highgpu-1g + managed_service_id: + type: string + example: aws-bedrock + gpu_id: + type: string + example: a100_40gb + gpu_count: + type: integer + minimum: 0 + maximum: 10000 + example: 8 + x-go-type: int64 + cpu_count: + type: integer + minimum: 1 + maximum: 10000 + example: 2 + x-go-type: int64 + idle_power_w_ex_gpu: + type: number + example: 100 + minimum: 0 + maximum: 10000 + format: float + x-go-type: float64 + average_utilization_rate: + type: number + example: 0.8 + format: float + x-go-type: float64 + minimum: 0 + maximum: 1 + embodied_emissions_kgco2e_ex_gpu: + type: number + example: 2500 + minimum: 0 + maximum: 100000 + format: float + x-go-type: float64 + embodied_water_l_ex_gpu: + type: number + example: 2500 + minimum: 0 + maximum: 100000 + format: float + x-go-type: float64 + use_life_years: + type: number + example: 5 + minimum: 1 + maximum: 30 + format: float + x-go-type: float64 + + NodeUpdateRequest: + type: object + description: | + Update an existing node. + - Id can not be updated + Note on permissions: + - cloud_instance_id and managed_service_id can only be set by admins or users who own those resources + - Custom nodes are visible only to their owners + - Global nodes are visible to all users + - Admins can see and manage all nodes + properties: + cloud_id: + type: string + example: aws + cloud_instance_id: + type: string + example: a2-highgpu-1g + managed_service_id: + type: string + example: aws-bedrock + gpu_id: + type: string + example: a100_40gb + gpu_count: + type: integer + minimum: 0 + maximum: 10000 + example: 8 + x-go-type: int64 + cpu_count: + type: integer + minimum: 1 + maximum: 10000 + example: 2 + x-go-type: int64 + idle_power_w_ex_gpu: + type: number + example: 100 + minimum: 0 + maximum: 10000 + format: float + x-go-type: float64 + average_utilization_rate: + type: number + example: 0.8 + format: float + x-go-type: float64 + minimum: 0 + maximum: 1 + embodied_emissions_kgco2e_ex_gpu: + type: number + example: 2500 + minimum: 0 + maximum: 100000 + format: float + x-go-type: float64 + embodied_water_l_ex_gpu: + type: number + example: 2500 + minimum: 0 + maximum: 100000 + format: float + x-go-type: float64 + use_life_years: + type: number + example: 5 + minimum: 1 + maximum: 30 + format: float + x-go-type: float64 + GPUResponse: title: GPU Response type: object @@ -484,7 +767,7 @@ components: country STRING, region STRING, task STRING, input_tokens INT64, output_tokens INT64, input_images STRING, output_images STRING, output_video_resolution INT64, - output_video_frames INT64, input_audio_seconds INT64, output_audio_seconds INT64, input_steps INT64 + output_video_frames INT64, input_audio_seconds INT64, input_steps INT64 type: array minItems: 17 maxItems: 21 @@ -493,6 +776,7 @@ components: oneOf: - type: string - type: integer + x-go-type: StringOrInt64 example: [ "gpt-4-turbo", @@ -531,8 +815,8 @@ components: description: Error message for BigQuery example: "Invalid request format: missing required field 'calls'" - ImpactLogRequest: - title: Log impact data + ImpactRequest: + title: Impact Request type: object additionalProperties: false required: @@ -542,31 +826,56 @@ components: type: array maxItems: 1000 items: - $ref: "#/components/schemas/ImpactLogRow" + $ref: "#/components/schemas/ImpactRow" - ImpactLogRow: + ImpactRow: type: object additionalProperties: false properties: - start_time_utc: + utc_datetime: + description: The start time of the request in UTC type: string format: date-time - description: The start time of the inference - example: "2024-10-01T00:00:00Z" + example: "2022-01-01T00:00:00Z" request_duration_ms: type: number format: float + x-go-type: float64 description: The time the request took (as measured by client or proxy) example: 283 processing_duration_ms: type: number format: float + x-go-type: float64 description: The time taken in processing the request (as measured at execution) example: 238 + request_cost: + type: number + format: float + x-go-type: float64 + description: The cost to execute this request + example: 0.18 + currency: + type: string + description: The currency for the cost data + example: "USD" integration_source: type: string description: The integration used to source the data example: "litellm" + environment: + type: string + description: Environment (prod/production indicates production) + example: "staging" + session_id: + type: string + description: The ID of the session (multiple requests) + trace_id: + type: string + description: The trace ID of the request (multiple requests in one task) + request_id: + type: string + description: The unique identifier of this request client_id: type: string description: The client to attribute this call to @@ -576,162 +885,46 @@ components: application_id: type: string description: The application to attribute this call to - session_id: - type: string - description: The ID of the session (multiple requests) - request_id: - type: string - description: The unique identifier of this request - environment: - type: string - description: Environment (prod/production indicates production) - example: "staging" model_id: type: string description: The ID of the model requested example: "llama_31_8b" - model_id_used: + model_family: type: string - description: The ID of the model that did the inference - example: "llama_31_8b_0125" + description: The family of the model + example: "llama" model_name: type: string description: The name of the model example: "LLaMa v3.1 8B" - model_family: - type: string - description: The family of the model - example: "llama" model_hugging_face_path: type: string description: The Hugging Face path of the model example: "meta/llama31_8b" - cloud_id: + model_used_id: type: string - description: The ID of the cloud - example: "aws" + description: The ID of the model that did the inference + example: "llama_31_8b_0125" cloud_region: type: string description: The region of cloud hosting - example: "us-central1" - cloud_instance_id: - type: string - description: The instance type in the cloud - example: "xl-4g-8a100" + example: us-central1 managed_service_id: type: string description: The ID of a managed service provider - example: "aws-bedrock" - node_id: - type: string - description: The ID of a proprietary node - example: "h200-2024-build" - node_country: - type: string - pattern: "^[A-Z]{2}$" - minLength: 2 - maxLength: 2 - description: The country where the servers are hosted - example: "US" - node_region: - type: string - pattern: "^[A-Z]{2}$" - minLength: 2 - maxLength: 2 - description: The region where the servers are hosted - example: "CA" - task: - $ref: "#/components/schemas/Task" - description: The task the AI is performing - example: "text-generation" - input_tokens: - type: integer - description: the number of input tokens - minimum: 0 - maximum: 100000000 - example: 1033 - output_tokens: - type: integer - description: the number of output tokens - minimum: 0 - maximum: 100000000 - example: 2300 - input_audio_seconds: - type: integer - description: the duration of audio input in seconds - minimum: 0 - maximum: 100000 - example: 60 - output_audio_seconds: - type: integer - description: the duration of audio output in seconds - minimum: 0 - maximum: 100000 - example: 60 - input_images: - type: string - description: a comma delimited list of image sizes - example: "512x512,1024x1024" - input_steps: - type: integer - description: the number of steps in the model - minimum: 1 - maximum: 10000 - example: 50 - output_images: - type: string - description: a comma delimited list of output sizes - example: "512x512,1024x1024" - output_video_frames: - type: integer - description: the number of video frames (frame rate x duration) - minimum: 0 - maximum: 100000000 - example: 60 - output_video_resolution: - type: integer - description: the resolution of the video in number of lines (for instance, 1080 for 1080p) - example: 1080 - request_cost: - type: number - format: float - description: the cost of this request - currency: + example: aws-bedrock + cloud_id: type: string - description: the currency for cost data - - ImpactRequest: - title: Impact Request - type: object - additionalProperties: false - required: - - rows - properties: - rows: - type: array - maxItems: 1000 - items: - $ref: "#/components/schemas/ImpactRow" - - ImpactRow: - type: object - additionalProperties: false - required: - - model - properties: - utc_datetime: - description: The start time of the request in UTC + description: The ID of the cloud + example: aws + cloud_instance_id: type: string - format: date-time - example: "2022-01-01T00:00:00Z" - model: - $ref: "#/components/schemas/Model" - cloud_region: + description: The instance type in the cloud + example: xl-4g-8a100 + node_id: type: string - description: The region of cloud hosting - example: us-central1 - node: - $ref: "#/components/schemas/Node" + description: The ID of a custom or global node + example: h200-2024-build country: $ref: "#/components/schemas/CountryCode" region: @@ -741,24 +934,21 @@ components: example: text-generation input_tokens: type: integer + x-go-type: int64 description: the number of input (or prompt) tokens minimum: 0 maximum: 100000000 example: 128 input_audio_seconds: type: integer + x-go-type: int64 description: the duration of audio input in seconds minimum: 0 maximum: 100000 example: 60 - output_audio_seconds: - type: integer - description: the duration of audio output in seconds - minimum: 0 - maximum: 100000 - example: 60 output_tokens: type: integer + x-go-type: int64 description: the number of output (or completion) tokens example: 128 minimum: 0 @@ -770,6 +960,7 @@ components: $ref: "#/components/schemas/Image" input_steps: type: integer + x-go-type: int64 description: the number of steps to use in the model minimum: 1 maximum: 10000 @@ -780,35 +971,39 @@ components: maxItems: 100 items: $ref: "#/components/schemas/Image" + output_audio_seconds: + type: number + format: float + x-go-type: float64 + description: the duration of audio output in seconds + minimum: 0 + maximum: 100000 + example: 60 + output_audio_tokens: + type: integer + x-go-type: int64 + description: the number of audio tokens in the output + minimum: 0 + maximum: 100000000 + example: 2300 output_video_frames: type: integer + x-go-type: int64 description: the number of video frames (frame rate x duration) minimum: 0 maximum: 100000000 example: 60 output_video_resolution: type: integer + x-go-type: int64 description: the resolution of the video in number of lines (for instance, 1080 for 1080p) example: 1080 - # field proposed to track information - request_duration_ms: - type: number - format: float - description: The time the request took (as measured by client or proxy) - example: 283 - nullable: true - managed_service_id: - type: string - description: The ID of a managed service provider - example: "aws-bedrock" - nullable: true - model_used: - $ref: "#/components/schemas/Model" - Model: title: Model type: object + required: + - id additionalProperties: false properties: id: @@ -830,13 +1025,16 @@ components: type: number example: 175 format: float + x-go-type: float64 number_of_experts: type: integer + x-go-type: int64 example: 7 params_per_expert_billions: type: number example: 8 format: float + x-go-type: float64 tensor_parallelism: type: integer example: 1 @@ -850,30 +1048,37 @@ components: type: number example: 1013.1 format: float + x-go-type: float64 training_usage_emissions_kgco2e: type: number example: 1013.1 format: float + x-go-type: float64 training_usage_water_l: type: number example: 1013.1 format: float + x-go-type: float64 training_embodied_emissions_kgco2e: type: number example: 11013.1 format: float + x-go-type: float64 training_embodied_water_l: type: number example: 11013.1 format: float + x-go-type: float64 estimated_use_life_days: type: number example: 1013.1 format: float + x-go-type: float64 estimated_requests_per_day: type: number example: 1013.1 format: float + x-go-type: float64 fine_tuned_from_model_id: type: string example: llama_31_8b @@ -895,14 +1100,17 @@ components: type: number example: 0.13 format: float + x-go-type: float64 total_gco2e: type: number example: 0.81 format: float + x-go-type: float64 total_mlh2o: type: number example: 1.32 format: float + x-go-type: float64 has_errors: type: boolean example: false @@ -940,22 +1148,27 @@ components: type: number example: 0.13 format: float + x-go-type: float64 usage_emissions_gco2e: type: number example: 0.81 format: float + x-go-type: float64 usage_water_ml: type: number example: 1.32 format: float + x-go-type: float64 embodied_emissions_gco2e: type: number example: 0.81 format: float + x-go-type: float64 embodied_water_ml: type: number example: 1.32 format: float + x-go-type: float64 DebugInfo: type: object @@ -986,8 +1199,10 @@ components: duration_ms: type: number format: float + x-go-type: float64 inferences: type: integer + x-go-type: int64 Error: type: object @@ -1022,73 +1237,42 @@ components: type: number example: 475 format: float + x-go-type: float64 minimum: 0 maximum: 2000 Node: - type: object - additionalProperties: false - properties: - id: - type: string - example: base-node-xl - cloud_id: - type: string - example: aws - cloud_instance_id: - type: string - example: a2-highgpu-1g - managed_service_id: - type: string - example: aws-bedrock - gpu_id: - type: string - example: a100_40gb - gpu: - $ref: "#/components/schemas/GPU" - gpu_count: - type: integer - minimum: 0 - maximum: 10000 - example: 8 - cpu_count: - type: integer - minimum: 1 - maximum: 10000 - example: 2 - idle_power_w_ex_gpu: - type: number - example: 100 - minimum: 0 - maximum: 10000 - format: float - average_utilization_rate: - type: number - example: 0.8 - format: float - minimum: 0 - maximum: 1 - embodied_emissions_kgco2e_ex_gpu: - type: number - example: 2500 - minimum: 0 - maximum: 100000 - format: float - embodied_water_l_ex_gpu: - type: number - example: 2500 - minimum: 0 - maximum: 100000 - format: float - use_life_years: - type: number - example: 5 - minimum: 1 - maximum: 30 - format: float + allOf: + - $ref: "#/components/schemas/NodeCreateRequest" + - type: object + required: + - created_at + - updated_at + properties: + customer_id: + type: int + x-go-type: int64 + description: ID of the customer who owns this node (visible to admins only) + created_at: + type: string + format: date-time + updated_at: + type: string + format: date-time + created_by: + type: string + description: ID of the user who created the node (admin or owner only) GPU: type: object + required: + - id + - max_power_w + - embodied_emissions_kgco2e + - embodied_water_mlh2o + - performance_ratio_to_h200 + - ols_intercept + - ols_coefficient_gpu_count additionalProperties: false properties: name: @@ -1101,41 +1285,37 @@ components: type: number example: 700 format: float + x-go-type: float64 embodied_emissions_kgco2e: type: number example: 282.1 format: float + x-go-type: float64 embodied_water_mlh2o: type: number example: 181.1 format: float + x-go-type: float64 performance_ratio_to_h200: type: number example: 1.5 format: float - - ManagedServiceProvider: - type: string - enum: - - aws-bedrock - - azure-ml - - google-vertex - - ibm-watson - - hugging-face + x-go-type: float64 + ols_coefficient_gpu_count: + type: number + example: 11.4 + format: float + x-go-type: float64 + ols_intercept: + type: number + example: 11.4 + format: float + x-go-type: float64 Image: type: string example: "1024x1024" - pattern: '^\d{1,4}x\d{1,4}$' - - CloudProvider: - type: string - enum: - - aws - - azure - - gcp - - oracle - - ibm + pattern: ^(\d{1,4})x(\d{1,4})$ Task: type: string diff --git a/tests/test_openai_multimodal.py b/tests/test_openai_multimodal.py index bebb680..0af7e8c 100644 --- a/tests/test_openai_multimodal.py +++ b/tests/test_openai_multimodal.py @@ -30,6 +30,8 @@ def load_image_b64(path: Path) -> str: def test_openai_multimodal_vision(tracer_init): from openai import OpenAI + from scope3ai.api.typesgen import Image + client = OpenAI() response = client.chat.completions.create( model="gpt-4o-mini", @@ -56,7 +58,7 @@ def test_openai_multimodal_vision(tracer_init): assert response.scope3ai.request.input_tokens == 872 assert response.scope3ai.request.output_tokens == 57 - assert response.scope3ai.request.input_images == "1024x1024" + assert response.scope3ai.request.input_images == [Image(root="1024x1024")] assert response.scope3ai.impact is None @@ -64,6 +66,8 @@ def test_openai_multimodal_vision(tracer_init): def test_openai_multimodal_vision_2_images(tracer_init): from openai import OpenAI + from scope3ai.api.typesgen import Image + client = OpenAI() response = client.chat.completions.create( model="gpt-4o-mini", @@ -96,7 +100,10 @@ def test_openai_multimodal_vision_2_images(tracer_init): assert response.scope3ai.request.input_tokens == 34016 assert response.scope3ai.request.output_tokens == 47 - assert response.scope3ai.request.input_images == "512x512,1024x1024" + assert response.scope3ai.request.input_images == [ + Image(root="512x512"), + Image(root="1024x1024"), + ] assert response.scope3ai.impact is None diff --git a/tests/test_openai_tts_tracer.py b/tests/test_openai_tts_tracer.py index fb7dca6..1c88727 100644 --- a/tests/test_openai_tts_tracer.py +++ b/tests/test_openai_tts_tracer.py @@ -19,9 +19,9 @@ def test_openai_tts_wrapper(tracer_init, audio_format, model): assert response is not None assert response.scope3ai is not None - assert response.scope3ai.request.audio_output_seconds is not None - assert response.scope3ai.request.audio_output_seconds > 0.5 - assert response.scope3ai.request.audio_output_seconds < 3 + assert response.scope3ai.request.output_audio_seconds is not None + assert response.scope3ai.request.output_audio_seconds > 0.5 + assert response.scope3ai.request.output_audio_seconds < 3 @pytest.mark.vcr @@ -41,6 +41,6 @@ async def test_openai_tts_wrapper_async(tracer_init, audio_format, model): assert response is not None assert response.scope3ai is not None - assert response.scope3ai.request.audio_output_seconds is not None - assert response.scope3ai.request.audio_output_seconds > 0.5 - assert response.scope3ai.request.audio_output_seconds < 3 + assert response.scope3ai.request.output_audio_seconds is not None + assert response.scope3ai.request.output_audio_seconds > 0.5 + assert response.scope3ai.request.output_audio_seconds < 3 diff --git a/tests/test_tracer.py b/tests/test_tracer.py index fffc11a..e632afb 100644 --- a/tests/test_tracer.py +++ b/tests/test_tracer.py @@ -189,13 +189,13 @@ def test_tracer_context_nested(tracer_init): def test_tracer_submit_impact(tracer_init): - from scope3ai.api.types import ImpactRow, Model + from scope3ai.api.types import ImpactRow # pause the background worker tracer_init._ensure_worker() tracer_init._worker.pause() - impact = ImpactRow(model=Model(id="gpt_4o"), input_tokens=100, output_tokens=100) + impact = ImpactRow(model_id="gpt_4o", input_tokens=100, output_tokens=100) ctx = tracer_init.submit_impact(impact) assert ctx is not None @@ -209,9 +209,9 @@ def test_tracer_submit_impact(tracer_init): def test_tracer_submit_impact_sync(tracer_with_sync_init): - from scope3ai.api.types import ImpactRow, Model + from scope3ai.api.types import ImpactRow - impact = ImpactRow(model=Model(id="gpt_4o"), input_tokens=100, output_tokens=100) + impact = ImpactRow(model_id="gpt_4o", input_tokens=100, output_tokens=100) ctx = tracer_with_sync_init.submit_impact(impact) assert ctx is not None @@ -220,13 +220,13 @@ def test_tracer_submit_impact_sync(tracer_with_sync_init): @pytest.mark.asyncio async def test_tracer_submit_impact_async(tracer_init): - from scope3ai.api.types import ImpactRow, Model + from scope3ai.api.types import ImpactRow # pause the background worker tracer_init._ensure_worker() tracer_init._worker.pause() - impact = ImpactRow(model=Model(id="gpt_4o"), input_tokens=100, output_tokens=100) + impact = ImpactRow(model_id="gpt_4o", input_tokens=100, output_tokens=100) ctx = await tracer_init.asubmit_impact(impact) assert ctx is not None @@ -240,9 +240,9 @@ async def test_tracer_submit_impact_async(tracer_init): @pytest.mark.asyncio async def test_tracer_submit_impact_sync_async(tracer_with_sync_init): - from scope3ai.api.types import ImpactRow, Model + from scope3ai.api.types import ImpactRow - impact = ImpactRow(model=Model(id="gpt_4o"), input_tokens=100, output_tokens=100) + impact = ImpactRow(model_id="gpt_4o", input_tokens=100, output_tokens=100) ctx = await tracer_with_sync_init.asubmit_impact(impact) assert ctx is not None diff --git a/tools/sync-api.py b/tools/sync-api.py new file mode 100644 index 0000000..7f9c943 --- /dev/null +++ b/tools/sync-api.py @@ -0,0 +1,108 @@ +import shutil +import tempfile +import subprocess +from pathlib import Path + + +def clone_repository(repo_url: str, clone_dir: Path) -> None: + try: + subprocess.run(["git", "clone", repo_url, str(clone_dir)], check=True) + print(f"Repository cloned successfully: {repo_url}") + except subprocess.CalledProcessError as e: + print(f"ERROR: Failed to clone repository: {e}") + + +def copy_file(src: Path, dst: Path) -> None: + try: + shutil.copy(src, dst) + print(f"File copied successfully from {src} to {dst}") + except IOError as e: + print(f"ERROR: Failed to copy file: {e}") + + +def patch_aiapi_yaml(filename: Path) -> None: + def remove_number_of_experts(text): + print("- Removing 'number_of_experts' from the ImpactRow") + lines = text.split("\n") + new_lines = [] + found = False + for line in lines: + if " - number_of_experts" in line: + found = True + else: + new_lines.append(line) + if not found: + raise ValueError("ERROR: 'number_of_experts' not found in the file.") + return "\n".join(new_lines) + + def fix_pattern(text): + print("- Fixing image pattern") + old_pattern = r"pattern: /^(\d{1,4})x(\d{1,4})$/" + new_pattern = r"pattern: ^(\d{1,4})x(\d{1,4})$" + + if old_pattern not in text: + raise ValueError( + r"ERROR: 'pattern: /^(\d{1,4})x(\d{1,4})$/' not found in the file." + ) + + return text.replace(old_pattern, new_pattern) + + try: + print(f"Patching aiapi.yaml: {filename}") + content = filename.read_text() + content = remove_number_of_experts(content) + content = fix_pattern(content) + filename.write_text(content) + + except Exception as e: + print(f"ERROR: Failed to patch aiapi.yaml: {e}") + + +def run_code_generation() -> None: + try: + subprocess.run( + [ + "datamodel-codegen", + "--input", + "tests/api-mocks/aiapi.yaml", + "--input-file-type", + "openapi", + "--output", + "scope3ai/api/typesgen.py", + "--output-model-type", + "pydantic_v2.BaseModel", + "--use-schema-description", + # If we have a field like `x: Optional[str] = Field(None, ...) + # And if not passed in the constructor, pyright will give back an issue + # saying that x is missing. But it's optional ! See + # https://github.com/pydantic/pydantic/discussions/7379 + "--use-annotated", + ], + check=True, + ) + subprocess.run(["ruff", "format", "scope3ai/api/typesgen.py"], check=True) + print("Code generation and formatting completed successfully.") + except subprocess.CalledProcessError as e: + print(f"ERROR: Failed to run code generation or formatting: {e}") + + +def main() -> None: + repo_url = "git@github.com:scope3data/aiapi" + dst_file = Path("tests/api-mocks/aiapi.yaml") + + with tempfile.TemporaryDirectory() as tmpdirname: + clone_dir = Path(tmpdirname) + src_file = clone_dir / "api/api.yaml" + + try: + clone_repository(repo_url, clone_dir) + copy_file(src_file, dst_file) + patch_aiapi_yaml(dst_file) + run_code_generation() + except Exception as e: + print(f"ERROR: An error occurred: {e}") + shutil.rmtree(clone_dir, ignore_errors=True) + + +if __name__ == "__main__": + main()