scope3data · kevdevg · Jan 15, 2025 · Jan 14, 2025
diff --git a/scope3ai/tracers/huggingface/instrument.py b/scope3ai/tracers/huggingface/instrument.py
@@ -10,6 +10,7 @@
 )
 from scope3ai.tracers.huggingface.speech_to_text import (
     huggingface_automatic_recognition_output_wrapper,
+    huggingface_automatic_recognition_output_wrapper_async,
 )
 from scope3ai.tracers.huggingface.text_to_image import (
     huggingface_text_to_image_wrapper,
@@ -75,6 +76,11 @@ def __init__(self) -> None:
                 "name": "InferenceClient.automatic_speech_recognition",
                 "wrapper": huggingface_automatic_recognition_output_wrapper,
             },
+            {
+                "module": "huggingface_hub.inference._generated._async_client",
+                "name": "AsyncInferenceClient.automatic_speech_recognition",
+                "wrapper": huggingface_automatic_recognition_output_wrapper_async,
+            },
             {
                 "module": "huggingface_hub.inference._generated._async_client",
                 "name": "AsyncInferenceClient.chat_completion",

diff --git a/scope3ai/tracers/huggingface/speech_to_text.py b/scope3ai/tracers/huggingface/speech_to_text.py
@@ -5,6 +5,7 @@
 from aiohttp import ClientResponse
 from huggingface_hub import (
     AutomaticSpeechRecognitionOutput as _AutomaticSpeechRecognitionOutput,
+    AsyncInferenceClient,
 )
 from huggingface_hub import InferenceClient  # type: ignore[import-untyped]
 from requests import Response
@@ -13,6 +14,7 @@
 from scope3ai.api.typesgen import Task
 from scope3ai.constants import PROVIDERS
 from scope3ai.lib import Scope3AI
+from scope3ai.response_interceptor.aiohttp_interceptor import aiohttp_response_capture
 from scope3ai.response_interceptor.requests_interceptor import requests_response_capture
 
 PROVIDER = PROVIDERS.HUGGINGFACE_HUB.value
@@ -32,13 +34,14 @@ def _hugging_face_automatic_recognition_wrapper(
     args: Any,
     kwargs: Any,
 ) -> AutomaticSpeechRecognitionOutput:
+    compute_audio_length = None
     if http_response:
         compute_audio_length = http_response.headers.get("x-compute-audio-length")
         compute_time = http_response.headers.get("x-compute-time")
     else:
-        compute_audio_length = 0
         compute_time = time.perf_counter() - timer_start
-
+    if not compute_audio_length:
+        compute_audio_length = 0
     scope3_row = ImpactRow(
         model=Model(id=model),
         task=Task.text_to_speech,
@@ -69,3 +72,21 @@ def huggingface_automatic_recognition_output_wrapper(
     return _hugging_face_automatic_recognition_wrapper(
         timer_start, model, response, http_response, args, kwargs
     )
+
+
+async def huggingface_automatic_recognition_output_wrapper_async(
+    wrapped: Callable, instance: AsyncInferenceClient, args: Any, kwargs: Any
+) -> AutomaticSpeechRecognitionOutput:
+    timer_start = time.perf_counter()
+    http_response: ClientResponse | None = None
+    with aiohttp_response_capture() as responses:
+        response = await wrapped(*args, **kwargs)
+        http_responses = responses.get()
+        if len(http_responses) > 0:
+            http_response = http_responses[-1]
+    model = kwargs.get("model") or instance.get_recommended_model(
+        HUGGING_FACE_SPEECH_TO_TEXT_TASK
+    )
+    return _hugging_face_automatic_recognition_wrapper(
+        timer_start, model, response, http_response, args, kwargs
+    )