scope3data · kevdevg · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 21, 2025
diff --git a/README.md b/README.md
@@ -29,14 +29,14 @@ uv add scope3ai
 
 ## 📚 Library and SDK support Matrix
 
-| Library/SDK | Text generation | TTS | STT | Image Generation | Translation |
-|-------------|-----------------|----|-----|------------------|-----------|
-| Anthropic   | ✅              |    |     |                  |           |
-| Cohere      | ✅              |    |     |                  |           |
-| OpenAI      | ✅              | ✅ | ✅  | ✅               | ✅        |
-| Huggingface | ✅              | ✅ | ✅  | ✅               | ✅        |
-| LiteLLM     | ✅              |    |     |                  |           |
-| MistralAi   | ✅              |    |     |                  |           |
+| Library/SDK | Text generation | TTS | STT | Image Generation | Translation | Multimodal input | Multimodal output |
+|-------------|-----------------|----|-----|------------------|-----------|------------------|-------------------|
+| Anthropic   | ✅              |    |     |                  |           |                  |
+| Cohere      | ✅              |    |     |                  |           |                  |
+| OpenAI      | ✅              | ✅ | ✅  | ✅               | ✅        | Images/Audio     |
+| Huggingface | ✅              | ✅ | ✅  | ✅               | ✅        |                  |
+| LiteLLM     | ✅              |    |     |                  |           | Images/Audio     |
+| MistralAi   | ✅              |    |     |                  |           | Images           |
 
 Roadmap:
 - Google

diff --git a/scope3ai/tracers/mistralai/chat.py b/scope3ai/tracers/mistralai/chat.py
@@ -1,3 +1,4 @@
+import logging
 import time
 from collections.abc import AsyncGenerator, Iterable
 from typing import Any, Callable, Optional
@@ -11,9 +12,12 @@
 from scope3ai.api.types import Scope3AIContext
 from scope3ai.api.typesgen import ImpactRow
 from scope3ai.constants import PROVIDERS
+from scope3ai.tracers.utils.multimodal import aggregate_multimodal
 
 PROVIDER = PROVIDERS.MISTRALAI.value
 
+logger = logging.getLogger("scope3ai.tracers.mistralai.chat")
+
 
 class ChatCompletionResponse(_ChatCompletionResponse):
     scope3ai: Optional[Scope3AIContext] = None
@@ -40,6 +44,9 @@ def mistralai_v1_chat_wrapper(
         managed_service_id=PROVIDER,
     )
     scope3ai_ctx = Scope3AI.get_instance().submit_impact(scope3_row)
+    messages = args[1] if len(args) > 1 else kwargs.get("messages")
+    for message in messages:
+        aggregate_multimodal(message, scope3_row, logger)
     chat = ChatCompletionResponse(**response.model_dump())
     chat.scope3ai = scope3ai_ctx
     return chat

diff --git a/...ttes/test_litellm_multimodal_audio_2.yaml → ...st_litellm_multimodal_audio_2_openai.yaml b/...ttes/test_litellm_multimodal_audio_2.yaml → ...st_litellm_multimodal_audio_2_openai.yaml
diff --git a/...settes/test_litellm_multimodal_audio.yaml → ...test_litellm_multimodal_audio_openai.yaml b/...settes/test_litellm_multimodal_audio.yaml → ...test_litellm_multimodal_audio_openai.yaml
diff --git a/tests/cassettes/test_litellm_multimodal_vision_2_images_mistralai.yaml b/tests/cassettes/test_litellm_multimodal_vision_2_images_mistralai.yaml
diff --git a/...t_litellm_multimodal_vision_2_images.yaml → ...lm_multimodal_vision_2_images_openai.yaml b/...t_litellm_multimodal_vision_2_images.yaml → ...lm_multimodal_vision_2_images_openai.yaml
diff --git a/tests/cassettes/test_litellm_multimodal_vision_mistralai.yaml b/tests/cassettes/test_litellm_multimodal_vision_mistralai.yaml
diff --git a/...ettes/test_litellm_multimodal_vision.yaml → ...est_litellm_multimodal_vision_openai.yaml b/...ettes/test_litellm_multimodal_vision.yaml → ...est_litellm_multimodal_vision_openai.yaml
diff --git a/tests/cassettes/test_mistralai_multimodal_vision.yaml b/tests/cassettes/test_mistralai_multimodal_vision.yaml
diff --git a/tests/cassettes/test_mistralai_multimodal_vision_2_images.yaml b/tests/cassettes/test_mistralai_multimodal_vision_2_images.yaml
diff --git a/tests/test_litellm_multimodal.py b/tests/test_litellm_multimodal.py
@@ -1,35 +1,19 @@
-import base64
-from pathlib import Path
 import pytest
 import litellm
 
 from scope3ai.api.typesgen import Image
-
-TEST_IMAGE_PNG = Path(__file__).parent / "data" / "image_1024.png"
-TEST_IMAGE_JPG = Path(__file__).parent / "data" / "image_512.jpg"
-TEST_AUDIO_MP3 = Path(__file__).parent / "data" / "hello_there.mp3"
-TEST_AUDIO_WAV = Path(__file__).parent / "data" / "hello_there.wav"
-
-
-def file_as_b64str(path: Path) -> str:
-    data = path.read_bytes()
-    return base64.b64encode(data).decode("utf-8")
-
-
-def load_image_b64(path: Path) -> str:
-    media_types = {
-        ".png": "image/png",
-        ".jpg": "image/jpeg",
-        ".jpeg": "image/jpeg",
-        ".webp": "image/webp",
-    }
-    b64 = file_as_b64str(path)
-    media_type = media_types[path.suffix]
-    return f"data:{media_type};base64,{b64}"
+from tests.utils import (
+    load_image_b64,
+    TEST_IMAGE_PNG,
+    file_as_b64str,
+    TEST_AUDIO_MP3,
+    TEST_IMAGE_JPG,
+    TEST_AUDIO_WAV,
+)
 
 
 @pytest.mark.vcr
-def test_litellm_multimodal_vision(tracer_with_sync_init):
+def test_litellm_multimodal_vision_openai(tracer_with_sync_init):
     response = litellm.completion(
         model="gpt-4o-mini",
         messages=[
@@ -66,7 +50,7 @@ def test_litellm_multimodal_vision(tracer_with_sync_init):
 
 
 @pytest.mark.vcr
-def test_litellm_multimodal_vision_2_images(tracer_with_sync_init):
+def test_litellm_multimodal_vision_2_images_openai(tracer_with_sync_init):
     from scope3ai.api.typesgen import Image
 
     response = litellm.completion(
@@ -113,7 +97,7 @@ def test_litellm_multimodal_vision_2_images(tracer_with_sync_init):
 
 
 @pytest.mark.vcr
-def test_litellm_multimodal_audio(tracer_with_sync_init):
+def test_litellm_multimodal_audio_openai(tracer_with_sync_init):
     response = litellm.completion(
         model="gpt-4o-audio-preview",
         messages=[
@@ -150,7 +134,7 @@ def test_litellm_multimodal_audio(tracer_with_sync_init):
 
 
 @pytest.mark.vcr
-def test_litellm_multimodal_audio_2(tracer_with_sync_init):
+def test_litellm_multimodal_audio_2_openai(tracer_with_sync_init):
     response = litellm.completion(
         model="gpt-4o-audio-preview",
         messages=[
@@ -191,3 +175,87 @@ def test_litellm_multimodal_audio_2(tracer_with_sync_init):
     assert response.scope3ai.impact.total_impact.usage_water_ml > 0
     assert response.scope3ai.impact.total_impact.embodied_emissions_gco2e > 0
     assert response.scope3ai.impact.total_impact.embodied_water_ml > 0
+
+
+@pytest.mark.vcr
+def test_litellm_multimodal_vision_mistralai(tracer_with_sync_init):
+    response = litellm.completion(
+        model="mistral/pixtral-12b-2409",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Hello World! What's the image about ?",
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": load_image_b64(TEST_IMAGE_PNG),
+                        },
+                    },
+                ],
+            },
+        ],
+        api_version="2024-02-15-preview",
+    )
+    assert len(response.choices) > 0
+    assert getattr(response, "scope3ai") is not None
+    assert response.scope3ai.request.input_tokens == 4172
+    assert response.scope3ai.request.output_tokens == 4253
+    assert response.scope3ai.request.input_images == [Image(root="1024x1024")]
+    assert response.scope3ai.impact is not None
+    assert response.scope3ai.impact.total_impact is not None
+    assert response.scope3ai.impact.total_impact.usage_energy_wh > 0
+    assert response.scope3ai.impact.total_impact.usage_emissions_gco2e > 0
+    assert response.scope3ai.impact.total_impact.usage_water_ml > 0
+    assert response.scope3ai.impact.total_impact.embodied_emissions_gco2e > 0
+    assert response.scope3ai.impact.total_impact.embodied_water_ml > 0
+
+
+@pytest.mark.vcr
+def test_litellm_multimodal_vision_2_images_mistralai(tracer_with_sync_init):
+    from scope3ai.api.typesgen import Image
+
+    response = litellm.completion(
+        model="mistral/pixtral-12b-2409",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Hello World! What's the image about ?",
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": load_image_b64(TEST_IMAGE_JPG),
+                        },
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": load_image_b64(TEST_IMAGE_PNG),
+                        },
+                    },
+                ],
+            },
+        ],
+    )
+    assert len(response.choices) > 0
+    assert getattr(response, "scope3ai") is not None
+    assert response.scope3ai.request.input_tokens == 5228
+    assert response.scope3ai.request.output_tokens == 5337
+    assert response.scope3ai.request.input_images == [
+        Image(root="512x512"),
+        Image(root="1024x1024"),
+    ]
+    assert response.scope3ai.impact is not None
+    assert response.scope3ai.impact.total_impact is not None
+    assert response.scope3ai.impact.total_impact.usage_energy_wh > 0
+    assert response.scope3ai.impact.total_impact.usage_emissions_gco2e > 0
+    assert response.scope3ai.impact.total_impact.usage_water_ml > 0
+    assert response.scope3ai.impact.total_impact.embodied_emissions_gco2e > 0
+    assert response.scope3ai.impact.total_impact.embodied_water_ml > 0
diff --git a/tests/test_mistralai_multimodal.py b/tests/test_mistralai_multimodal.py
@@ -0,0 +1,87 @@
+import pytest
+from mistralai import Mistral
+
+from scope3ai.api.typesgen import Image
+from tests.utils import load_image_b64, TEST_IMAGE_PNG, TEST_IMAGE_JPG
+
+
+@pytest.mark.vcr
+def test_mistralai_multimodal_vision(tracer_with_sync_init):
+    client = Mistral()
+    response = client.chat.complete(
+        model="pixtral-12b-2409",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Hello World! What's the image about ?",
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": load_image_b64(TEST_IMAGE_PNG),
+                        },
+                    },
+                ],
+            },
+        ],
+    )
+    assert len(response.choices) > 0
+    assert response.scope3ai.request.input_tokens == 4172
+    assert response.scope3ai.request.output_tokens == 105
+    assert response.scope3ai.request.input_images == [Image(root="1024x1024")]
+    assert response.scope3ai.impact is not None
+    assert response.scope3ai.impact.total_impact is not None
+    assert response.scope3ai.impact.total_impact.usage_energy_wh > 0
+    assert response.scope3ai.impact.total_impact.usage_emissions_gco2e > 0
+    assert response.scope3ai.impact.total_impact.usage_water_ml > 0
+    assert response.scope3ai.impact.total_impact.embodied_emissions_gco2e > 0
+    assert response.scope3ai.impact.total_impact.embodied_water_ml > 0
+
+
+@pytest.mark.vcr
+def test_mistralai_multimodal_vision_2_images(tracer_with_sync_init):
+    client = Mistral()
+    response = client.chat.complete(
+        model="pixtral-12b-2409",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Hello World! What's the image about ?",
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": load_image_b64(TEST_IMAGE_JPG),
+                        },
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": load_image_b64(TEST_IMAGE_PNG),
+                        },
+                    },
+                ],
+            },
+        ],
+    )
+    assert len(response.choices) > 0
+    assert getattr(response, "scope3ai") is not None
+    assert response.scope3ai.request.input_tokens == 5228
+    assert response.scope3ai.request.output_tokens == 108
+    assert response.scope3ai.request.input_images == [
+        Image(root="512x512"),
+        Image(root="1024x1024"),
+    ]
+    assert response.scope3ai.impact is not None
+    assert response.scope3ai.impact.total_impact is not None
+    assert response.scope3ai.impact.total_impact.usage_energy_wh > 0
+    assert response.scope3ai.impact.total_impact.usage_emissions_gco2e > 0
+    assert response.scope3ai.impact.total_impact.usage_water_ml > 0
+    assert response.scope3ai.impact.total_impact.embodied_emissions_gco2e > 0
+    assert response.scope3ai.impact.total_impact.embodied_water_ml > 0
diff --git a/tests/test_openai_multimodal.py b/tests/test_openai_multimodal.py
@@ -1,29 +1,13 @@
-import base64
-from pathlib import Path
-
 import pytest
 
-TEST_IMAGE_PNG = Path(__file__).parent / "data" / "image_1024.png"
-TEST_IMAGE_JPG = Path(__file__).parent / "data" / "image_512.jpg"
-TEST_AUDIO_MP3 = Path(__file__).parent / "data" / "hello_there.mp3"
-TEST_AUDIO_WAV = Path(__file__).parent / "data" / "hello_there.wav"
-
-
-def file_as_b64str(path: Path) -> str:
-    data = path.read_bytes()
-    return base64.b64encode(data).decode("utf-8")
-
-
-def load_image_b64(path: Path) -> str:
-    media_types = {
-        ".png": "image/png",
-        ".jpg": "image/jpeg",
-        ".jpeg": "image/jpeg",
-        ".webp": "image/webp",
-    }
-    b64 = file_as_b64str(path)
-    media_type = media_types[path.suffix]
-    return f"data:{media_type};base64,{b64}"
+from tests.utils import (
+    TEST_IMAGE_PNG,
+    load_image_b64,
+    TEST_IMAGE_JPG,
+    TEST_AUDIO_MP3,
+    file_as_b64str,
+    TEST_AUDIO_WAV,
+)
 
 
 @pytest.mark.vcr

diff --git a/tests/utils.py b/tests/utils.py
@@ -0,0 +1,24 @@
+import base64
+from pathlib import Path
+
+TEST_IMAGE_PNG = Path(__file__).parent / "data" / "image_1024.png"
+TEST_IMAGE_JPG = Path(__file__).parent / "data" / "image_512.jpg"
+TEST_AUDIO_MP3 = Path(__file__).parent / "data" / "hello_there.mp3"
+TEST_AUDIO_WAV = Path(__file__).parent / "data" / "hello_there.wav"
+
+
+def file_as_b64str(path: Path) -> str:
+    data = path.read_bytes()
+    return base64.b64encode(data).decode("utf-8")
+
+
+def load_image_b64(path: Path) -> str:
+    media_types = {
+        ".png": "image/png",
+        ".jpg": "image/jpeg",
+        ".jpeg": "image/jpeg",
+        ".webp": "image/webp",
+    }
+    b64 = file_as_b64str(path)
+    media_type = media_types[path.suffix]
+    return f"data:{media_type};base64,{b64}"