Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 68 additions & 3 deletions crates/forge_app/src/transformers/model_specific_reasoning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,14 @@ impl ModelSpecificReasoning {

fn family(&self) -> AnthropicModelFamily {
let id = self.model_id.to_lowercase();
if id.contains("opus-4-7") || id.contains("47-opus") {
if id.contains("opus-4-8")
|| id.contains("48-opus")
|| id.contains("opus-4-7")
|| id.contains("47-opus")
{
// Opus 4.8 shares Opus 4.7's API contract: adaptive thinking only
// (legacy `budget_tokens` returns 400) and non-default sampling
// params (`temperature`/`top_p`/`top_k`) return 400.
AnthropicModelFamily::AdaptiveOnly
} else if id.contains("opus-4-6")
|| id.contains("46-opus")
Expand Down Expand Up @@ -82,7 +89,7 @@ impl Transformer for ModelSpecificReasoning {
warn!(
model = %self.model_id,
dropped_max_tokens = max_tokens,
"Dropping `reasoning.max_tokens` for Opus 4.7: extended thinking budgets are unsupported. Use `reasoning.effort` to control thinking depth instead."
"Dropping `reasoning.max_tokens` for Opus 4.7/4.8: extended thinking budgets are unsupported. Use `reasoning.effort` to control thinking depth instead."
);
}
context.temperature = None;
Expand All @@ -108,7 +115,7 @@ impl Transformer for ModelSpecificReasoning {
{
warn!(
model = %self.model_id,
"Dropping `reasoning.effort`: the effort parameter is only supported on Opus 4.5, Opus 4.6, Sonnet 4.6, and Opus 4.7."
"Dropping `reasoning.effort`: the effort parameter is only supported on Opus 4.5, Opus 4.6, Sonnet 4.6, Opus 4.7, and Opus 4.8."
);
reasoning.effort = None;
}
Expand Down Expand Up @@ -155,6 +162,64 @@ mod tests {
assert_eq!(actual, expected);
}

#[test]
fn test_opus_4_8_drops_max_tokens_and_sampling_params() {
// Opus 4.8 shares Opus 4.7's API contract: legacy `budget_tokens` and
// non-default sampling params both return 400, so they must be dropped.
let fixture = fixture_context_with_sampling().reasoning(ReasoningConfig {
enabled: Some(true),
max_tokens: Some(8000),
effort: Some(Effort::XHigh),
exclude: Some(true),
});

let actual = ModelSpecificReasoning::new("claude-opus-4-8").transform(fixture);

let expected = Context::default().reasoning(ReasoningConfig {
enabled: Some(true),
max_tokens: None,
effort: Some(Effort::XHigh),
exclude: Some(true),
});

assert_eq!(actual, expected);
}

#[test]
fn test_opus_4_8_strips_sampling_even_without_reasoning() {
let fixture = fixture_context_with_sampling();

let actual = ModelSpecificReasoning::new("claude-opus-4-8").transform(fixture);

let expected = Context::default();

assert_eq!(actual, expected);
}

#[test]
fn test_opus_4_8_bedrock_prefix_still_matches() {
// Bedrock region prefixes (`us.anthropic.claude-...`) must still be
// classified as AdaptiveOnly so sampling params are stripped and
// `max_tokens` is dropped.
let fixture = fixture_context_with_sampling().reasoning(ReasoningConfig {
enabled: Some(true),
max_tokens: Some(8000),
effort: Some(Effort::XHigh),
exclude: None,
});

let actual = ModelSpecificReasoning::new("us.anthropic.claude-opus-4-8").transform(fixture);

let expected = Context::default().reasoning(ReasoningConfig {
enabled: Some(true),
max_tokens: None,
effort: Some(Effort::XHigh),
exclude: None,
});

assert_eq!(actual, expected);
}

#[test]
fn test_opus_4_7_strips_sampling_even_without_reasoning() {
let fixture = fixture_context_with_sampling();
Expand Down
15 changes: 10 additions & 5 deletions crates/forge_repo/src/provider/anthropic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,16 @@ impl<H: HttpInfra> Anthropic<H> {
}

/// Returns false when the model auto-enables interleaved thinking through
/// adaptive thinking (Opus 4.7, Opus 4.6, Sonnet 4.6). When the model is
/// unknown (e.g., listing endpoints), the flag is included because it is
/// adaptive thinking (Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 4.6). When the model
/// is unknown (e.g., listing endpoints), the flag is included because it is
/// harmless on non-chat endpoints and necessary on older chat models.
fn interleaved_thinking_required(model: Option<&ModelId>) -> bool {
let Some(model) = model else { return true };
let id = model.as_str().to_lowercase();
!(id.contains("opus-4-7") || id.contains("opus-4-6") || id.contains("sonnet-4-6"))
!(id.contains("opus-4-8")
|| id.contains("opus-4-7")
|| id.contains("opus-4-6")
|| id.contains("sonnet-4-6"))
}

impl<T: HttpInfra> Anthropic<T> {
Expand Down Expand Up @@ -801,8 +804,8 @@ mod tests {

#[test]
fn test_get_headers_drops_interleaved_thinking_for_4_6_plus_models() {
// Adaptive thinking auto-enables interleaved thinking on Opus 4.7,
// Opus 4.6, and Sonnet 4.6; the beta header is redundant there.
// Adaptive thinking auto-enables interleaved thinking on Opus 4.8,
// Opus 4.7, Opus 4.6, and Sonnet 4.6; the beta header is redundant there.
let chat_url = Url::parse("https://api.anthropic.com/v1/messages").unwrap();
let model_url = Url::parse("https://api.anthropic.com/v1/models").unwrap();

Expand Down Expand Up @@ -832,9 +835,11 @@ mod tests {
);

for model_id in [
"claude-opus-4-8",
"claude-opus-4-7",
"claude-opus-4-6",
"claude-sonnet-4-6",
"us.anthropic.claude-opus-4-8",
"us.anthropic.claude-opus-4-7",
"global.anthropic.claude-sonnet-4-6",
] {
Expand Down
Loading