diff --git a/crates/forge_app/src/transformers/model_specific_reasoning.rs b/crates/forge_app/src/transformers/model_specific_reasoning.rs index 64a192afc6..8a98030adb 100644 --- a/crates/forge_app/src/transformers/model_specific_reasoning.rs +++ b/crates/forge_app/src/transformers/model_specific_reasoning.rs @@ -27,7 +27,14 @@ impl ModelSpecificReasoning { fn family(&self) -> AnthropicModelFamily { let id = self.model_id.to_lowercase(); - if id.contains("opus-4-7") || id.contains("47-opus") { + if id.contains("opus-4-8") + || id.contains("48-opus") + || id.contains("opus-4-7") + || id.contains("47-opus") + { + // Opus 4.8 shares Opus 4.7's API contract: adaptive thinking only + // (legacy `budget_tokens` returns 400) and non-default sampling + // params (`temperature`/`top_p`/`top_k`) return 400. AnthropicModelFamily::AdaptiveOnly } else if id.contains("opus-4-6") || id.contains("46-opus") @@ -82,7 +89,7 @@ impl Transformer for ModelSpecificReasoning { warn!( model = %self.model_id, dropped_max_tokens = max_tokens, - "Dropping `reasoning.max_tokens` for Opus 4.7: extended thinking budgets are unsupported. Use `reasoning.effort` to control thinking depth instead." + "Dropping `reasoning.max_tokens` for Opus 4.7/4.8: extended thinking budgets are unsupported. Use `reasoning.effort` to control thinking depth instead." ); } context.temperature = None; @@ -108,7 +115,7 @@ impl Transformer for ModelSpecificReasoning { { warn!( model = %self.model_id, - "Dropping `reasoning.effort`: the effort parameter is only supported on Opus 4.5, Opus 4.6, Sonnet 4.6, and Opus 4.7." + "Dropping `reasoning.effort`: the effort parameter is only supported on Opus 4.5, Opus 4.6, Sonnet 4.6, Opus 4.7, and Opus 4.8." ); reasoning.effort = None; } @@ -155,6 +162,64 @@ mod tests { assert_eq!(actual, expected); } + #[test] + fn test_opus_4_8_drops_max_tokens_and_sampling_params() { + // Opus 4.8 shares Opus 4.7's API contract: legacy `budget_tokens` and + // non-default sampling params both return 400, so they must be dropped. + let fixture = fixture_context_with_sampling().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::XHigh), + exclude: Some(true), + }); + + let actual = ModelSpecificReasoning::new("claude-opus-4-8").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::XHigh), + exclude: Some(true), + }); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_8_strips_sampling_even_without_reasoning() { + let fixture = fixture_context_with_sampling(); + + let actual = ModelSpecificReasoning::new("claude-opus-4-8").transform(fixture); + + let expected = Context::default(); + + assert_eq!(actual, expected); + } + + #[test] + fn test_opus_4_8_bedrock_prefix_still_matches() { + // Bedrock region prefixes (`us.anthropic.claude-...`) must still be + // classified as AdaptiveOnly so sampling params are stripped and + // `max_tokens` is dropped. + let fixture = fixture_context_with_sampling().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: Some(8000), + effort: Some(Effort::XHigh), + exclude: None, + }); + + let actual = ModelSpecificReasoning::new("us.anthropic.claude-opus-4-8").transform(fixture); + + let expected = Context::default().reasoning(ReasoningConfig { + enabled: Some(true), + max_tokens: None, + effort: Some(Effort::XHigh), + exclude: None, + }); + + assert_eq!(actual, expected); + } + #[test] fn test_opus_4_7_strips_sampling_even_without_reasoning() { let fixture = fixture_context_with_sampling(); diff --git a/crates/forge_repo/src/provider/anthropic.rs b/crates/forge_repo/src/provider/anthropic.rs index c9449d1bca..d8c004b5b2 100644 --- a/crates/forge_repo/src/provider/anthropic.rs +++ b/crates/forge_repo/src/provider/anthropic.rs @@ -92,13 +92,16 @@ impl Anthropic { } /// Returns false when the model auto-enables interleaved thinking through -/// adaptive thinking (Opus 4.7, Opus 4.6, Sonnet 4.6). When the model is -/// unknown (e.g., listing endpoints), the flag is included because it is +/// adaptive thinking (Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 4.6). When the model +/// is unknown (e.g., listing endpoints), the flag is included because it is /// harmless on non-chat endpoints and necessary on older chat models. fn interleaved_thinking_required(model: Option<&ModelId>) -> bool { let Some(model) = model else { return true }; let id = model.as_str().to_lowercase(); - !(id.contains("opus-4-7") || id.contains("opus-4-6") || id.contains("sonnet-4-6")) + !(id.contains("opus-4-8") + || id.contains("opus-4-7") + || id.contains("opus-4-6") + || id.contains("sonnet-4-6")) } impl Anthropic { @@ -801,8 +804,8 @@ mod tests { #[test] fn test_get_headers_drops_interleaved_thinking_for_4_6_plus_models() { - // Adaptive thinking auto-enables interleaved thinking on Opus 4.7, - // Opus 4.6, and Sonnet 4.6; the beta header is redundant there. + // Adaptive thinking auto-enables interleaved thinking on Opus 4.8, + // Opus 4.7, Opus 4.6, and Sonnet 4.6; the beta header is redundant there. let chat_url = Url::parse("https://api.anthropic.com/v1/messages").unwrap(); let model_url = Url::parse("https://api.anthropic.com/v1/models").unwrap(); @@ -832,9 +835,11 @@ mod tests { ); for model_id in [ + "claude-opus-4-8", "claude-opus-4-7", "claude-opus-4-6", "claude-sonnet-4-6", + "us.anthropic.claude-opus-4-8", "us.anthropic.claude-opus-4-7", "global.anthropic.claude-sonnet-4-6", ] {