Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/modelgauge/sut_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from modelgauge.sut import SUT
from modelgauge.sut_definition import SUTDefinition
from modelgauge.sut_registry import SUTS
from modelgauge.suts.anthropic_sut_factory import AnthropicSUTFactory
from modelgauge.suts.google_sut_factory import GoogleSUTFactory
from modelgauge.suts.huggingface_sut_factory import HuggingFaceSUTFactory
from modelgauge.suts.indirect_sut import IndirectSUTFactory
from modelgauge.suts.modelship_sut import ModelShipSUTFactory
Expand All @@ -28,12 +30,14 @@ class SUTType(Enum):
# Maps a string to the module and factory function in that module
# that can be used to create a dynamic sut
DYNAMIC_SUT_FACTORIES: dict = {
"anthropic": AnthropicSUTFactory,
"google": GoogleSUTFactory,
"hf": HuggingFaceSUTFactory,
"hfrelay": HuggingFaceSUTFactory,
"indirect": IndirectSUTFactory,
"openai": OpenAICompatibleSUTFactory,
"together": TogetherSUTFactory,
"modelship": ModelShipSUTFactory,
"together": TogetherSUTFactory,
}

LEGACY_SUT_MODULE_MAP = {
Expand Down
55 changes: 55 additions & 0 deletions src/modelgauge/suts/anthropic_sut_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import difflib
import re
from collections import defaultdict

from anthropic import Anthropic

from modelgauge.dynamic_sut_factory import DynamicSUTFactory, ModelNotSupportedError
from modelgauge.secret_values import RawSecrets, InjectSecret
from modelgauge.sut import SUT
from modelgauge.sut_definition import SUTDefinition
from modelgauge.suts.anthropic_api import AnthropicApiKey, AnthropicSUT


class AnthropicSUTFactory(DynamicSUTFactory):
def get_secrets(self) -> list[InjectSecret]:
api_key = InjectSecret(AnthropicApiKey)
return [api_key]

def __init__(self, raw_secrets: RawSecrets):
super().__init__(raw_secrets)
self._client = None # Lazy load.

def client(self) -> Anthropic:
if self._client is None:
self._client = Anthropic(api_key=self._secret().value)
return self._client

def _secret(self) -> AnthropicApiKey:
return self.injected_secrets()[0]

def make_sut(self, sut_definition: SUTDefinition) -> SUT:
model_names = [m.id for m in self.client().models.list()]
uid = sut_definition.dynamic_uid
requested_model = sut_definition.to_dynamic_sut_metadata().model
if requested_model not in model_names:
dateless_names = defaultdict(list)
for n in model_names:
key = re.sub(r"-\d{8}$", "", n)
dateless_names[key].append(n)

if requested_model not in dateless_names:
raise ModelNotSupportedError(
f"{requested_model} not specific enough. Closest options are {difflib.get_close_matches(requested_model, model_names, cutoff=0.1)}"
)

if len(dateless_names[requested_model]) > 1:
raise ModelNotSupportedError(
f"{requested_model} not specific enough. Available options are {dateless_names[requested_model]}"
)

new_name = dateless_names[requested_model][0]
uid = uid.replace(requested_model, new_name)
requested_model = new_name

return AnthropicSUT(uid, requested_model, self._secret())
42 changes: 42 additions & 0 deletions src/modelgauge/suts/google_sut_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import difflib

from google import genai

from modelgauge.dynamic_sut_factory import DynamicSUTFactory, ModelNotSupportedError
from modelgauge.secret_values import RawSecrets, InjectSecret
from modelgauge.sut import SUT
from modelgauge.sut_definition import SUTDefinition
from modelgauge.suts.google_genai import GoogleGenAiSUT
from modelgauge.suts.google_generativeai import GoogleAiApiKey

DRIVER_NAME = "google"


class GoogleSUTFactory(DynamicSUTFactory):
def get_secrets(self) -> list[InjectSecret]:
api_key = InjectSecret(GoogleAiApiKey)
return [api_key]

def __init__(self, raw_secrets: RawSecrets):
super().__init__(raw_secrets)
self._gemini_client = None # Lazy load.

def gemini_client(self) -> genai.Client:
if self._gemini_client is None:
self._gemini_client = genai.Client(api_key=self._gemini_secret().value)
return self._gemini_client

def _gemini_secret(self) -> GoogleAiApiKey:
return self.injected_secrets()[0]

def make_sut(self, sut_definition: SUTDefinition) -> SUT:
model_names = [m.name.replace("models/", "") for m in self.gemini_client().models.list()]
requested_model = sut_definition.to_dynamic_sut_metadata().model
if requested_model not in model_names:
raise ModelNotSupportedError(
f"{requested_model} not found in Gemini models. Closest options are {difflib.get_close_matches(requested_model, model_names, cutoff=0.1)}"
)

return GoogleGenAiSUT(
sut_definition.dynamic_uid, requested_model, sut_definition.get("reasoning", False), self._gemini_secret()
)
2 changes: 0 additions & 2 deletions src/modelgauge/suts/together_sut_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from modelgauge.sut_definition import SUTDefinition
from modelgauge.suts.together_client import TogetherChatSUT


DRIVER_NAME = "together"


Expand All @@ -28,7 +27,6 @@ def get_secrets(self) -> list[InjectSecret]:
return [api_key]

def _find(self, sut_metadata: DynamicSUTMetadata):
model = None
try:
model = sut_metadata.external_model_name().lower()
self.client.chat.completions.create(
Expand Down
61 changes: 61 additions & 0 deletions tests/modelgauge_tests/data/anthropic-model-list.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"data": [
{
"type": "model",
"id": "claude-haiku-4-5-20251001",
"display_name": "Claude Haiku 4.5",
"created_at": "2025-10-15T00:00:00Z"
},
{
"type": "model",
"id": "claude-sonnet-4-5-20250929",
"display_name": "Claude Sonnet 4.5",
"created_at": "2025-09-29T00:00:00Z"
},
{
"type": "model",
"id": "claude-opus-4-1-20250805",
"display_name": "Claude Opus 4.1",
"created_at": "2025-08-05T00:00:00Z"
},
{
"type": "model",
"id": "claude-opus-4-20250514",
"display_name": "Claude Opus 4",
"created_at": "2025-05-22T00:00:00Z"
},
{
"type": "model",
"id": "claude-sonnet-4-20250514",
"display_name": "Claude Sonnet 4",
"created_at": "2025-05-22T00:00:00Z"
},
{
"type": "model",
"id": "claude-3-7-sonnet-20250219",
"display_name": "Claude Sonnet 3.7",
"created_at": "2025-02-24T00:00:00Z"
},
{
"type": "model",
"id": "claude-3-5-haiku-20241022",
"display_name": "Claude Haiku 3.5",
"created_at": "2024-10-22T00:00:00Z"
},
{
"type": "model",
"id": "claude-3-haiku-20240307",
"display_name": "Claude Haiku 3",
"created_at": "2024-03-07T00:00:00Z"
},
{
"type": "model",
"id": "claude-3-opus-20240229",
"display_name": "Claude Opus 3",
"created_at": "2024-02-29T00:00:00Z"
}
],
"has_more": false,
"first_id": "claude-haiku-4-5-20251001",
"last_id": "claude-3-opus-20240229"
}
Loading