Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests-integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ jobs:
ollama serve &
ollama pull llama3.2:1b
ollama pull qwen3:0.6b
ollama pull llava:7b
ollama pull qwen3-vl:2b

- name: Wait for Ollama to be ready
if: github.event.inputs.filter == '' || contains(github.event.inputs.filter, 'ollama')
Expand Down
9 changes: 9 additions & 0 deletions src/any_llm/providers/minimax/minimax.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk as OpenAIChatCompletionChunk

from any_llm.exceptions import UnsupportedParameterError
from any_llm.providers.minimax.utils import _convert_chat_completion
from any_llm.providers.openai.base import BaseOpenAIProvider
from any_llm.types.completion import ChatCompletion, ChatCompletionChunk, CompletionParams, Reasoning
from any_llm.utils.reasoning import process_streaming_reasoning_chunks
Expand All @@ -23,6 +24,14 @@ class MinimaxProvider(BaseOpenAIProvider):
SUPPORTS_LIST_MODELS = False
SUPPORTS_EMBEDDING = False

@staticmethod
def _convert_completion_response(response: Any) -> ChatCompletion:
if isinstance(response, OpenAIChatCompletion):
return _convert_chat_completion(response)
if isinstance(response, ChatCompletion):
return response
return ChatCompletion.model_validate(response)

def _convert_completion_response_async(
self, response: OpenAIChatCompletion | AsyncStream[OpenAIChatCompletionChunk]
) -> ChatCompletion | AsyncIterator[ChatCompletionChunk]:
Expand Down
22 changes: 22 additions & 0 deletions src/any_llm/providers/minimax/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from openai.types.chat.chat_completion import ChatCompletion as OpenAIChatCompletion

from any_llm.providers.openai.utils import _normalize_openai_dict_response
from any_llm.types.completion import ChatCompletion
from any_llm.utils.reasoning import normalize_reasoning_from_provider_fields_and_xml_tags


def _convert_chat_completion(response: OpenAIChatCompletion) -> ChatCompletion:
response_dict = _normalize_openai_dict_response(response.model_dump())

choices = response_dict.get("choices")
if isinstance(choices, list):
for choice in choices:
message = choice.get("message") if isinstance(choice, dict) else None
if isinstance(message, dict):
normalize_reasoning_from_provider_fields_and_xml_tags(message)

delta = choice.get("delta") if isinstance(choice, dict) else None
if isinstance(delta, dict):
normalize_reasoning_from_provider_fields_and_xml_tags(delta)

return ChatCompletion.model_validate(response_dict)
16 changes: 9 additions & 7 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ def provider_reasoning_model_map() -> dict[LLMProvider, str]:
LLMProvider.GROQ: "openai/gpt-oss-20b",
LLMProvider.FIREWORKS: "accounts/fireworks/models/gpt-oss-20b",
LLMProvider.OPENAI: "gpt-5-nano",
LLMProvider.MISTRAL: "magistral-small-latest",
LLMProvider.MISTRAL: "magistral-medium-latest",
LLMProvider.XAI: "grok-3-mini-latest",
LLMProvider.OLLAMA: "qwen3:0.6b",
LLMProvider.OPENROUTER: "deepseek/deepseek-v3.1-terminus",
LLMProvider.OPENROUTER: "google/gemini-2.5-flash-lite",
LLMProvider.LLAMAFILE: "N/A",
LLMProvider.LLAMACPP: "N/A",
LLMProvider.LMSTUDIO: "openai/gpt-oss-20b", # You must have LM Studio running and the server enabled
Expand All @@ -34,7 +34,7 @@ def provider_reasoning_model_map() -> dict[LLMProvider, str]:
LLMProvider.NEBIUS: "openai/gpt-oss-20b",
LLMProvider.SAMBANOVA: "DeepSeek-R1-Distill-Llama-70B",
LLMProvider.TOGETHER: "OpenAI/gpt-oss-20B",
LLMProvider.PORTKEY: "@anthropic/claude-3-7-sonnet-latest",
LLMProvider.PORTKEY: "@nebius-any-llm/Qwen/Qwen3-14B",
LLMProvider.MINIMAX: "MiniMax-M2",
LLMProvider.ZAI: "glm-4.5-flash",
}
Expand Down Expand Up @@ -68,12 +68,12 @@ def provider_model_map() -> dict[LLMProvider, str]:
LLMProvider.WATSONX: "ibm/granite-3-8b-instruct",
LLMProvider.FIREWORKS: "accounts/fireworks/models/llama4-scout-instruct-basic",
LLMProvider.GROQ: "openai/gpt-oss-20b",
LLMProvider.PORTKEY: "@any-llm-test/gpt-4.1-nano",
LLMProvider.PORTKEY: "@any-llm-test/gpt-4.1-mini",
LLMProvider.LLAMA: "Llama-4-Maverick-17B-128E-Instruct-FP8",
LLMProvider.AZURE: "openai/gpt-4.1-nano",
LLMProvider.AZUREOPENAI: "azure/<your_deployment_name>",
LLMProvider.PERPLEXITY: "sonar",
LLMProvider.OPENROUTER: "meta-llama/llama-3.3-8b-instruct:free",
LLMProvider.OPENROUTER: "google/gemini-2.5-flash-lite",
LLMProvider.LLAMACPP: "N/A",
LLMProvider.MINIMAX: "MiniMax-M2",
LLMProvider.ZAI: "glm-4-32b-0414-128k",
Expand All @@ -84,11 +84,12 @@ def provider_model_map() -> dict[LLMProvider, str]:
def provider_image_model_map(provider_model_map: dict[LLMProvider, str]) -> dict[LLMProvider, str]:
return {
**provider_model_map,
LLMProvider.OPENAI: "gpt-5-mini", # Slightly more powerful so that it doesn't get caught in a loop of logic
LLMProvider.WATSONX: "meta-llama/llama-guard-3-11b-vision",
LLMProvider.SAMBANOVA: "Llama-4-Maverick-17B-128E-Instruct",
LLMProvider.NEBIUS: "openai/gpt-oss-20b",
LLMProvider.OPENROUTER: "mistralai/mistral-small-3.2-24b-instruct:free",
LLMProvider.OLLAMA: "llava:7b",
LLMProvider.OPENROUTER: "google/gemini-2.5-flash-lite",
LLMProvider.OLLAMA: "qwen3-vl:2b",
}


Expand Down Expand Up @@ -127,6 +128,7 @@ def provider_client_config() -> dict[LLMProvider, dict[str, Any]]:
LLMProvider.COHERE: {"timeout": 10},
LLMProvider.GATEWAY: {"api_base": "http://127.0.0.1:3000", "timeout": 1},
LLMProvider.GROQ: {"timeout": 10},
LLMProvider.OPENAI: {"timeout": 100},
LLMProvider.HUGGINGFACE: {"api_base": "https://y0okp71n85ezo5nr.us-east-1.aws.endpoints.huggingface.cloud/v1/"},
LLMProvider.LLAMACPP: {"api_base": "http://127.0.0.1:8090/v1"},
LLMProvider.MISTRAL: {"timeout_ms": 100000},
Expand Down
5 changes: 3 additions & 2 deletions tests/integration/test_list_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from tests.constants import EXPECTED_PROVIDERS, LOCAL_PROVIDERS


def test_list_models(provider: LLMProvider, provider_client_config: dict[LLMProvider, dict[str, Any]]) -> None:
@pytest.mark.asyncio
async def test_list_models(provider: LLMProvider, provider_client_config: dict[LLMProvider, dict[str, Any]]) -> None:
"""Test that all supported providers can be loaded successfully."""
try:
config = provider_client_config.get(provider, {})
Expand All @@ -21,7 +22,7 @@ def test_list_models(provider: LLMProvider, provider_client_config: dict[LLMProv
if not llm.SUPPORTS_LIST_MODELS:
pytest.skip(f"{provider.value} does not support listing models, skipping")

available_models = llm.list_models()
available_models = await llm.alist_models()
assert len(available_models) > 0
assert isinstance(available_models, list)
assert all(isinstance(model, Model) for model in available_models)
Expand Down
7 changes: 4 additions & 3 deletions tests/integration/test_reasoning.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ async def test_completion_reasoning_streaming(
num_chunks = 0
results = await llm.acompletion(
model=model_id,
messages=[{"role": "user", "content": "Please say hello! Think very briefly before you respond."}],
messages=[{"role": "user", "content": "Please say hello! Think before you respond."}],
stream=True,
reasoning_effort="low"
if provider
Expand All @@ -97,6 +97,7 @@ async def test_completion_reasoning_streaming(
LLMProvider.PORTKEY,
LLMProvider.SAMBANOVA,
LLMProvider.TOGETHER,
LLMProvider.PORTKEY,
)
else "auto",
max_tokens=4999
Expand All @@ -113,9 +114,9 @@ async def test_completion_reasoning_streaming(
reasoning += result.choices[0].delta.reasoning.content or ""

assert num_chunks >= 1, f"Expected at least 1 chunk, got {num_chunks}"
assert output.strip() != "", "Expected non-empty output content"
assert output != "", f"Expected non-empty output content, got {output}"

assert reasoning.strip() != "", f"Expected non-empty reasoning content for {provider.value}"
assert reasoning != "", f"Expected non-empty reasoning content for {provider.value}, got {output}"
except MissingApiKeyError:
if provider in EXPECTED_PROVIDERS:
raise
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def echo(message: str) -> str:

available_tools = {"echo": echo}

prompt = "Please call the `echo` tool with the argument `Hello, world!`"
prompt = "Please call the `echo` tool with the argument `Hello, world!`. You must use the tool, do not ask any follow up questions."
messages: list[dict[str, Any] | ChatCompletionMessage] = [{"role": "user", "content": prompt}]

try:
Expand All @@ -47,7 +47,7 @@ def echo(message: str) -> str:
messages.append(result.choices[0].message)

completion_tool_calls = result.choices[0].message.tool_calls
assert completion_tool_calls is not None
assert completion_tool_calls is not None, f"No tool calls found in response: {result.choices[0].message}"
assert (
len(completion_tool_calls) > 0
) # if the llm wants to call more than one tool that's ok for the purpose of the test
Expand Down