mozilla-ai · njbrake · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/.github/workflows/tests-integration.yaml b/.github/workflows/tests-integration.yaml
@@ -154,7 +154,7 @@ jobs:
           ollama serve &
           ollama pull llama3.2:1b
           ollama pull qwen3:0.6b
-          ollama pull llava:7b
+          ollama pull qwen3-vl:2b
 
       - name: Wait for Ollama to be ready
         if: github.event.inputs.filter == '' || contains(github.event.inputs.filter, 'ollama')

diff --git a/src/any_llm/providers/minimax/minimax.py b/src/any_llm/providers/minimax/minimax.py
@@ -6,6 +6,7 @@
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk as OpenAIChatCompletionChunk
 
 from any_llm.exceptions import UnsupportedParameterError
+from any_llm.providers.minimax.utils import _convert_chat_completion
 from any_llm.providers.openai.base import BaseOpenAIProvider
 from any_llm.types.completion import ChatCompletion, ChatCompletionChunk, CompletionParams, Reasoning
 from any_llm.utils.reasoning import process_streaming_reasoning_chunks
@@ -23,6 +24,14 @@ class MinimaxProvider(BaseOpenAIProvider):
     SUPPORTS_LIST_MODELS = False
     SUPPORTS_EMBEDDING = False
 
+    @staticmethod
+    def _convert_completion_response(response: Any) -> ChatCompletion:
+        if isinstance(response, OpenAIChatCompletion):
+            return _convert_chat_completion(response)
+        if isinstance(response, ChatCompletion):
+            return response
+        return ChatCompletion.model_validate(response)
+
     def _convert_completion_response_async(
         self, response: OpenAIChatCompletion | AsyncStream[OpenAIChatCompletionChunk]
     ) -> ChatCompletion | AsyncIterator[ChatCompletionChunk]:

diff --git a/src/any_llm/providers/minimax/utils.py b/src/any_llm/providers/minimax/utils.py
@@ -0,0 +1,22 @@
+from openai.types.chat.chat_completion import ChatCompletion as OpenAIChatCompletion
+
+from any_llm.providers.openai.utils import _normalize_openai_dict_response
+from any_llm.types.completion import ChatCompletion
+from any_llm.utils.reasoning import normalize_reasoning_from_provider_fields_and_xml_tags
+
+
+def _convert_chat_completion(response: OpenAIChatCompletion) -> ChatCompletion:
+    response_dict = _normalize_openai_dict_response(response.model_dump())
+
+    choices = response_dict.get("choices")
+    if isinstance(choices, list):
+        for choice in choices:
+            message = choice.get("message") if isinstance(choice, dict) else None
+            if isinstance(message, dict):
+                normalize_reasoning_from_provider_fields_and_xml_tags(message)
+
+            delta = choice.get("delta") if isinstance(choice, dict) else None
+            if isinstance(delta, dict):
+                normalize_reasoning_from_provider_fields_and_xml_tags(delta)
+
+    return ChatCompletion.model_validate(response_dict)
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -17,10 +17,10 @@ def provider_reasoning_model_map() -> dict[LLMProvider, str]:
         LLMProvider.GROQ: "openai/gpt-oss-20b",
         LLMProvider.FIREWORKS: "accounts/fireworks/models/gpt-oss-20b",
         LLMProvider.OPENAI: "gpt-5-nano",
-        LLMProvider.MISTRAL: "magistral-small-latest",
+        LLMProvider.MISTRAL: "magistral-medium-latest",
         LLMProvider.XAI: "grok-3-mini-latest",
         LLMProvider.OLLAMA: "qwen3:0.6b",
-        LLMProvider.OPENROUTER: "deepseek/deepseek-v3.1-terminus",
+        LLMProvider.OPENROUTER: "google/gemini-2.5-flash-lite",
         LLMProvider.LLAMAFILE: "N/A",
         LLMProvider.LLAMACPP: "N/A",
         LLMProvider.LMSTUDIO: "openai/gpt-oss-20b",  # You must have LM Studio running and the server enabled
@@ -34,7 +34,7 @@ def provider_reasoning_model_map() -> dict[LLMProvider, str]:
         LLMProvider.NEBIUS: "openai/gpt-oss-20b",
         LLMProvider.SAMBANOVA: "DeepSeek-R1-Distill-Llama-70B",
         LLMProvider.TOGETHER: "OpenAI/gpt-oss-20B",
-        LLMProvider.PORTKEY: "@anthropic/claude-3-7-sonnet-latest",
+        LLMProvider.PORTKEY: "@nebius-any-llm/Qwen/Qwen3-14B",
         LLMProvider.MINIMAX: "MiniMax-M2",
         LLMProvider.ZAI: "glm-4.5-flash",
     }
@@ -68,12 +68,12 @@ def provider_model_map() -> dict[LLMProvider, str]:
         LLMProvider.WATSONX: "ibm/granite-3-8b-instruct",
         LLMProvider.FIREWORKS: "accounts/fireworks/models/llama4-scout-instruct-basic",
         LLMProvider.GROQ: "openai/gpt-oss-20b",
-        LLMProvider.PORTKEY: "@any-llm-test/gpt-4.1-nano",
+        LLMProvider.PORTKEY: "@any-llm-test/gpt-4.1-mini",
         LLMProvider.LLAMA: "Llama-4-Maverick-17B-128E-Instruct-FP8",
         LLMProvider.AZURE: "openai/gpt-4.1-nano",
         LLMProvider.AZUREOPENAI: "azure/<your_deployment_name>",
         LLMProvider.PERPLEXITY: "sonar",
-        LLMProvider.OPENROUTER: "meta-llama/llama-3.3-8b-instruct:free",
+        LLMProvider.OPENROUTER: "google/gemini-2.5-flash-lite",
         LLMProvider.LLAMACPP: "N/A",
         LLMProvider.MINIMAX: "MiniMax-M2",
         LLMProvider.ZAI: "glm-4-32b-0414-128k",
@@ -84,11 +84,12 @@ def provider_model_map() -> dict[LLMProvider, str]:
 def provider_image_model_map(provider_model_map: dict[LLMProvider, str]) -> dict[LLMProvider, str]:
     return {
         **provider_model_map,
+        LLMProvider.OPENAI: "gpt-5-mini",  # Slightly more powerful so that it doesn't get caught in a loop of logic
         LLMProvider.WATSONX: "meta-llama/llama-guard-3-11b-vision",
         LLMProvider.SAMBANOVA: "Llama-4-Maverick-17B-128E-Instruct",
         LLMProvider.NEBIUS: "openai/gpt-oss-20b",
-        LLMProvider.OPENROUTER: "mistralai/mistral-small-3.2-24b-instruct:free",
-        LLMProvider.OLLAMA: "llava:7b",
+        LLMProvider.OPENROUTER: "google/gemini-2.5-flash-lite",
+        LLMProvider.OLLAMA: "qwen3-vl:2b",
     }
 
 
@@ -127,6 +128,7 @@ def provider_client_config() -> dict[LLMProvider, dict[str, Any]]:
         LLMProvider.COHERE: {"timeout": 10},
         LLMProvider.GATEWAY: {"api_base": "http://127.0.0.1:3000", "timeout": 1},
         LLMProvider.GROQ: {"timeout": 10},
+        LLMProvider.OPENAI: {"timeout": 100},
         LLMProvider.HUGGINGFACE: {"api_base": "https://y0okp71n85ezo5nr.us-east-1.aws.endpoints.huggingface.cloud/v1/"},
         LLMProvider.LLAMACPP: {"api_base": "http://127.0.0.1:8090/v1"},
         LLMProvider.MISTRAL: {"timeout_ms": 100000},

diff --git a/tests/integration/test_list_models.py b/tests/integration/test_list_models.py
@@ -10,7 +10,8 @@
 from tests.constants import EXPECTED_PROVIDERS, LOCAL_PROVIDERS
 
 
-def test_list_models(provider: LLMProvider, provider_client_config: dict[LLMProvider, dict[str, Any]]) -> None:
+@pytest.mark.asyncio
+async def test_list_models(provider: LLMProvider, provider_client_config: dict[LLMProvider, dict[str, Any]]) -> None:
     """Test that all supported providers can be loaded successfully."""
     try:
         config = provider_client_config.get(provider, {})
@@ -21,7 +22,7 @@ def test_list_models(provider: LLMProvider, provider_client_config: dict[LLMProv
         if not llm.SUPPORTS_LIST_MODELS:
             pytest.skip(f"{provider.value} does not support listing models, skipping")
 
-        available_models = llm.list_models()
+        available_models = await llm.alist_models()
         assert len(available_models) > 0
         assert isinstance(available_models, list)
         assert all(isinstance(model, Model) for model in available_models)

diff --git a/tests/integration/test_reasoning.py b/tests/integration/test_reasoning.py
@@ -83,7 +83,7 @@ async def test_completion_reasoning_streaming(
         num_chunks = 0
         results = await llm.acompletion(
             model=model_id,
-            messages=[{"role": "user", "content": "Please say hello! Think very briefly before you respond."}],
+            messages=[{"role": "user", "content": "Please say hello! Think before you respond."}],
             stream=True,
             reasoning_effort="low"
             if provider
@@ -97,6 +97,7 @@ async def test_completion_reasoning_streaming(
                 LLMProvider.PORTKEY,
                 LLMProvider.SAMBANOVA,
                 LLMProvider.TOGETHER,
+                LLMProvider.PORTKEY,
             )
             else "auto",
             max_tokens=4999
@@ -113,9 +114,9 @@ async def test_completion_reasoning_streaming(
                     reasoning += result.choices[0].delta.reasoning.content or ""
 
         assert num_chunks >= 1, f"Expected at least 1 chunk, got {num_chunks}"
-        assert output.strip() != "", "Expected non-empty output content"
+        assert output != "", f"Expected non-empty output content, got {output}"
 
-        assert reasoning.strip() != "", f"Expected non-empty reasoning content for {provider.value}"
+        assert reasoning != "", f"Expected non-empty reasoning content for {provider.value}, got {output}"
     except MissingApiKeyError:
         if provider in EXPECTED_PROVIDERS:
             raise

diff --git a/tests/integration/test_tool.py b/tests/integration/test_tool.py
@@ -29,7 +29,7 @@ def echo(message: str) -> str:
 
     available_tools = {"echo": echo}
 
-    prompt = "Please call the `echo` tool with the argument `Hello, world!`"
+    prompt = "Please call the `echo` tool with the argument `Hello, world!`. You must use the tool, do not ask any follow up questions."
     messages: list[dict[str, Any] | ChatCompletionMessage] = [{"role": "user", "content": prompt}]
 
     try:
@@ -47,7 +47,7 @@ def echo(message: str) -> str:
         messages.append(result.choices[0].message)
 
         completion_tool_calls = result.choices[0].message.tool_calls
-        assert completion_tool_calls is not None
+        assert completion_tool_calls is not None, f"No tool calls found in response: {result.choices[0].message}"
         assert (
             len(completion_tool_calls) > 0
         )  # if the llm wants to call more than one tool that's ok for the purpose of the test