scope3data · kevdevg · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025
diff --git a/scope3ai/tracers/huggingface/chat.py b/scope3ai/tracers/huggingface/chat.py
@@ -39,6 +39,7 @@ def huggingface_chat_wrapper(
 def huggingface_chat_wrapper_non_stream(
     wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
 ) -> ChatCompletionOutput:
+    timer_start = time.perf_counter()
     http_response: Response | None = None
     with requests_response_capture() as responses:
         response = wrapped(*args, **kwargs)
@@ -48,7 +49,10 @@ def huggingface_chat_wrapper_non_stream(
     model = (
         instance.model or kwargs.get("model") or instance.get_recommended_model("chat")
     )
-    compute_time = http_response.headers.get("x-compute-time")
+    if http_response:
+        compute_time = http_response.headers.get("x-compute-time")
+    else:
+        compute_time = time.perf_counter() - timer_start
     scope3_row = ImpactRow(
         model=Model(id=model),
         input_tokens=response.usage.prompt_tokens,

diff --git a/scope3ai/tracers/huggingface/image_to_image.py b/scope3ai/tracers/huggingface/image_to_image.py
@@ -0,0 +1,100 @@
+import time
+from dataclasses import dataclass
+from typing import Any, Callable, Optional, Union
+
+import tiktoken
+from PIL import Image
+from aiohttp import ClientResponse
+from huggingface_hub import ImageToImageOutput as _ImageToImageOutput
+from huggingface_hub import InferenceClient, AsyncInferenceClient  # type: ignore[import-untyped]
+from requests import Response
+
+from scope3ai.api.types import Scope3AIContext, Model, ImpactRow
+from scope3ai.api.typesgen import Task
+from scope3ai.constants import PROVIDERS
+from scope3ai.lib import Scope3AI
+from scope3ai.response_interceptor.aiohttp_interceptor import aiohttp_response_capture
+from scope3ai.response_interceptor.requests_interceptor import requests_response_capture
+
+PROVIDER = PROVIDERS.HUGGINGFACE_HUB.value
+
+
+@dataclass
+class ImageToImageOutput(_ImageToImageOutput):
+    scope3ai: Optional[Scope3AIContext] = None
+
+
+def _hugging_face_image_to_image_wrapper(
+    timer_start: Any,
+    model: Any,
+    response: Any,
+    http_response: Union[ClientResponse, Response],
+    args: Any,
+    kwargs: Any,
+) -> ImageToImageOutput:
+    if http_response:
+        compute_time = http_response.headers.get("x-compute-time")
+        input_tokens = http_response.headers.get("x-compute-characters")
+    else:
+        compute_time = time.perf_counter() - timer_start
+        encoder = tiktoken.get_encoding("cl100k_base")
+        prompt = args[1] if len(args) > 1 else kwargs.get("prompt", "")
+        input_tokens = len(encoder.encode(prompt)) if prompt != "" else 0
+    input_images = None
+    try:
+        input_image = Image.open(args[0] if len(args) > 0 else kwargs["image"])
+        input_width, input_height = input_image.size
+        input_images = [
+            ("{width}x{height}".format(width=input_width, height=input_height))
+        ]
+    except Exception:
+        pass
+    output_width, output_height = response.size
+    scope3_row = ImpactRow(
+        model=Model(id=model),
+        input_tokens=input_tokens,
+        task=Task.image_generation,
+        request_duration_ms=float(compute_time) * 1000,
+        managed_service_id=PROVIDER,
+        output_images=[
+            "{width}x{height}".format(width=output_width, height=output_height)
+        ],
+        input_images=input_images,
+    )
+
+    scope3_ctx = Scope3AI.get_instance().submit_impact(scope3_row)
+    result = ImageToImageOutput(response)
+    result.scope3ai = scope3_ctx
+    return result
+
+
+def huggingface_image_to_image_wrapper(
+    wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
+) -> ImageToImageOutput:
+    timer_start = time.perf_counter()
+    http_response: Response | None = None
+    with requests_response_capture() as responses:
+        response = wrapped(*args, **kwargs)
+        http_responses = responses.get()
+        if len(http_responses) > 0:
+            http_response = http_responses[-1]
+    model = kwargs.get("model") or instance.get_recommended_model("image-to-image")
+    return _hugging_face_image_to_image_wrapper(
+        timer_start, model, response, http_response, args, kwargs
+    )
+
+
+async def huggingface_image_to_image_wrapper_async(
+    wrapped: Callable, instance: AsyncInferenceClient, args: Any, kwargs: Any
+) -> ImageToImageOutput:
+    timer_start = time.perf_counter()
+    http_response: ClientResponse | None = None
+    with aiohttp_response_capture() as responses:
+        response = await wrapped(*args, **kwargs)
+        http_responses = responses.get()
+        if len(http_responses) > 0:
+            http_response = http_responses[-1]
+    model = kwargs.get("model") or instance.get_recommended_model("image-to-image")
+    return _hugging_face_image_to_image_wrapper(
+        timer_start, model, response, http_response, args, kwargs
+    )
diff --git a/scope3ai/tracers/huggingface/instrument.py b/scope3ai/tracers/huggingface/instrument.py
@@ -4,6 +4,10 @@
     huggingface_chat_wrapper,
     huggingface_async_chat_wrapper,
 )
+from scope3ai.tracers.huggingface.image_to_image import (
+    huggingface_image_to_image_wrapper,
+    huggingface_image_to_image_wrapper_async,
+)
 from scope3ai.tracers.huggingface.speech_to_text import (
     huggingface_automatic_recognition_output_wrapper,
 )
@@ -13,6 +17,7 @@
 )
 from scope3ai.tracers.huggingface.text_to_speech import (
     huggingface_text_to_speech_wrapper,
+    huggingface_text_to_speech_wrapper_async,
 )
 from scope3ai.tracers.huggingface.translation import (
     huggingface_translation_wrapper_non_stream,
@@ -48,6 +53,11 @@ def __init__(self) -> None:
                 "name": "InferenceClient.text_to_speech",
                 "wrapper": huggingface_text_to_speech_wrapper,
             },
+            {
+                "module": "huggingface_hub.inference._generated._async_client",
+                "name": "AsyncInferenceClient.text_to_speech",
+                "wrapper": huggingface_text_to_speech_wrapper_async,
+            },
             {
                 "module": "huggingface_hub.inference._client",
                 "name": "InferenceClient.automatic_speech_recognition",
@@ -63,6 +73,16 @@ def __init__(self) -> None:
                 "name": "AsyncInferenceClient.text_to_image",
                 "wrapper": huggingface_text_to_image_wrapper_async,
             },
+            {
+                "module": "huggingface_hub.inference._client",
+                "name": "InferenceClient.image_to_image",
+                "wrapper": huggingface_image_to_image_wrapper,
+            },
+            {
+                "module": "huggingface_hub.inference._generated._async_client",
+                "name": "AsyncInferenceClient.image_to_image",
+                "wrapper": huggingface_image_to_image_wrapper_async,
+            },
         ]
 
     def instrument(self) -> None:

diff --git a/scope3ai/tracers/huggingface/speech_to_text.py b/scope3ai/tracers/huggingface/speech_to_text.py
@@ -1,6 +1,8 @@
+import time
 from dataclasses import dataclass, asdict
-from typing import Any, Callable, Optional
+from typing import Any, Callable, Optional, Union
 
+from aiohttp import ClientResponse
 from huggingface_hub import (
     AutomaticSpeechRecognitionOutput as _AutomaticSpeechRecognitionOutput,
 )
@@ -21,23 +23,25 @@ class AutomaticSpeechRecognitionOutput(_AutomaticSpeechRecognitionOutput):
     scope3ai: Optional[Scope3AIContext] = None
 
 
-def huggingface_automatic_recognition_output_wrapper_non_stream(
-    wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
+def _hugging_face_automatic_recognition_wrapper(
+    timer_start: Any,
+    model: Any,
+    response: Any,
+    http_response: Union[ClientResponse, Response],
+    args: Any,
+    kwargs: Any,
 ) -> AutomaticSpeechRecognitionOutput:
-    http_response: Response | None = None
-    with requests_response_capture() as responses:
-        response = wrapped(*args, **kwargs)
-        http_responses = responses.get()
-        if len(http_responses) > 0:
-            http_response = http_responses[0]
-    compute_audio_length = http_response.headers.get("x-compute-audio-length")
-    compute_time = http_response.headers.get("x-compute-time")
-    model = kwargs.get("model") or instance.get_recommended_model("text-to-speech")
+    if http_response:
+        compute_audio_length = http_response.headers.get("x-compute-audio-length")
+        compute_time = http_response.headers.get("x-compute-time")
+    else:
+        compute_audio_length = 0
+        compute_time = time.perf_counter() - timer_start
 
     scope3_row = ImpactRow(
         model=Model(id=model),
         task=Task.text_to_speech,
-        output_audio_seconds=int(float(compute_audio_length)),
+        input_audio_seconds=int(float(compute_audio_length)),
         request_duration_ms=float(compute_time) * 1000,
         managed_service_id=PROVIDER,
     )
@@ -51,6 +55,16 @@ def huggingface_automatic_recognition_output_wrapper_non_stream(
 def huggingface_automatic_recognition_output_wrapper(
     wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
 ) -> AutomaticSpeechRecognitionOutput:
-    return huggingface_automatic_recognition_output_wrapper_non_stream(
-        wrapped, instance, args, kwargs
+    timer_start = time.perf_counter()
+    http_response: Response | None = None
+    with requests_response_capture() as responses:
+        response = wrapped(*args, **kwargs)
+        http_responses = responses.get()
+        if len(http_responses) > 0:
+            http_response = http_responses[-1]
+    model = kwargs.get("model") or instance.get_recommended_model(
+        "automatic-speech-recognition"
+    )
+    return _hugging_face_automatic_recognition_wrapper(
+        timer_start, model, response, http_response, args, kwargs
     )
diff --git a/scope3ai/tracers/huggingface/text_to_image.py b/scope3ai/tracers/huggingface/text_to_image.py
@@ -1,5 +1,6 @@
+import time
 from dataclasses import dataclass
-from typing import Any, Callable, Optional
+from typing import Any, Callable, Optional, Union
 
 import tiktoken
 from aiohttp import ClientResponse
@@ -22,23 +23,24 @@ class TextToImageOutput(_TextToImageOutput):
     scope3ai: Optional[Scope3AIContext] = None
 
 
-def huggingface_text_to_image_wrapper_non_stream(
-    wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
+def _hugging_face_text_to_image_wrapper(
+    timer_start: Any,
+    model: Any,
+    response: Any,
+    http_response: Union[ClientResponse, Response],
+    args: Any,
+    kwargs: Any,
 ) -> TextToImageOutput:
-    http_response: Response | None = None
-    with requests_response_capture() as responses:
-        response = wrapped(*args, **kwargs)
-        http_responses = responses.get()
-        if len(http_responses) > 0:
-            http_response = http_responses[-1]
-    model = kwargs.get("model") or instance.get_recommended_model("text-to-image")
-    encoder = tiktoken.get_encoding("cl100k_base")
-    if len(args) > 0:
-        prompt = args[0]
+    input_tokens = None
+    if http_response:
+        compute_time = http_response.headers.get("x-compute-time")
+        input_tokens = http_response.headers.get("x-compute-characters")
     else:
-        prompt = kwargs["prompt"]
-    compute_time = http_response.headers.get("x-compute-time")
-    input_tokens = len(encoder.encode(prompt))
+        compute_time = time.perf_counter() - timer_start
+    if not input_tokens:
+        encoder = tiktoken.get_encoding("cl100k_base")
+        prompt = args[0] if len(args) > 0 else kwargs.get("prompt", "")
+        input_tokens = len(encoder.encode(prompt)) if prompt != "" else 0
     width, height = response.size
     scope3_row = ImpactRow(
         model=Model(id=model),
@@ -55,48 +57,33 @@ def huggingface_text_to_image_wrapper_non_stream(
     return result
 
 
-async def huggingface_text_to_image_wrapper_async_non_stream(
-    wrapped: Callable, instance: AsyncInferenceClient, args: Any, kwargs: Any
+def huggingface_text_to_image_wrapper(
+    wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
 ) -> TextToImageOutput:
-    http_response: ClientResponse | None = None
-    with aiohttp_response_capture() as responses:
-        response = await wrapped(*args, **kwargs)
+    timer_start = time.perf_counter()
+    http_response: Response | None = None
+    with requests_response_capture() as responses:
+        response = wrapped(*args, **kwargs)
         http_responses = responses.get()
         if len(http_responses) > 0:
             http_response = http_responses[-1]
     model = kwargs.get("model") or instance.get_recommended_model("text-to-image")
-    encoder = tiktoken.get_encoding("cl100k_base")
-    if len(args) > 0:
-        prompt = args[0]
-    else:
-        prompt = kwargs["prompt"]
-    compute_time = http_response.headers.get("x-compute-time")
-    input_tokens = len(encoder.encode(prompt))
-    width, height = response.size
-    scope3_row = ImpactRow(
-        model=Model(id=model),
-        input_tokens=input_tokens,
-        task=Task.text_to_image,
-        output_images=["{width}x{height}".format(width=width, height=height)],
-        request_duration_ms=float(compute_time) * 1000,
-        managed_service_id=PROVIDER,
+    return _hugging_face_text_to_image_wrapper(
+        timer_start, model, response, http_response, args, kwargs
     )
 
-    scope3_ctx = Scope3AI.get_instance().submit_impact(scope3_row)
-    result = TextToImageOutput(response)
-    result.scope3ai = scope3_ctx
-    return result
-
-
-def huggingface_text_to_image_wrapper(
-    wrapped: Callable, instance: InferenceClient, args: Any, kwargs: Any
-) -> TextToImageOutput:
-    return huggingface_text_to_image_wrapper_non_stream(wrapped, instance, args, kwargs)
-
 
 async def huggingface_text_to_image_wrapper_async(
     wrapped: Callable, instance: AsyncInferenceClient, args: Any, kwargs: Any
 ) -> TextToImageOutput:
-    return await huggingface_text_to_image_wrapper_async_non_stream(
-        wrapped, instance, args, kwargs
+    timer_start = time.perf_counter()
+    http_response: ClientResponse | None = None
+    with aiohttp_response_capture() as responses:
+        response = await wrapped(*args, **kwargs)
+        http_responses = responses.get()
+        if len(http_responses) > 0:
+            http_response = http_responses[-1]
+    model = kwargs.get("model") or instance.get_recommended_model("text-to-image")
+    return _hugging_face_text_to_image_wrapper(
+        timer_start, model, response, http_response, args, kwargs
     )