diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index c4bdc916ffd4..7e66a5b70397 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -519,6 +519,10 @@ const RESIZED_IMAGE_BYTES_ESTIMATE: i64 = 7373; // Use a direct 32px patch count only for `detail: "original"`; // all other image inputs continue to use `RESIZED_IMAGE_BYTES_ESTIMATE`. const ORIGINAL_IMAGE_PATCH_SIZE: u32 = 32; +// See https://platform.openai.com/docs/guides/images-vision#model-sizing-behavior. +// Keep this hard-coded for now; move it into model capabilities if the patch +// budget starts changing often across model releases. +const ORIGINAL_IMAGE_MAX_PATCHES: usize = 10_000; const ORIGINAL_IMAGE_ESTIMATE_CACHE_SIZE: usize = 32; static ORIGINAL_IMAGE_ESTIMATE_CACHE: LazyLock>> = @@ -621,6 +625,7 @@ fn estimate_original_image_bytes(image_url: &str) -> Option { let patches_high = height.saturating_add(patch_size.saturating_sub(1)) / patch_size; let patch_count = patches_wide.saturating_mul(patches_high); let patch_count = usize::try_from(patch_count).unwrap_or(usize::MAX); + let patch_count = patch_count.min(ORIGINAL_IMAGE_MAX_PATCHES); Some(i64::try_from(approx_bytes_for_tokens(patch_count)).unwrap_or(i64::MAX)) }) } diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index f5c30af5759d..ad67deb544c5 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -26,6 +26,7 @@ use codex_utils_output_truncation::TruncationPolicy; use codex_utils_output_truncation::truncate_text; use image::ImageBuffer; use image::ImageFormat; +use image::Luma; use image::Rgba; use pretty_assertions::assert_eq; use regex_lite::Regex; @@ -1900,6 +1901,38 @@ fn original_detail_images_scale_with_dimensions() { assert_eq!(estimated, expected); } +#[test] +fn original_detail_images_are_capped_at_max_patch_count() { + // 3201x3201 at 32px patches yields 101 * 101 = 10,201 patches, + // which exceeds the original-detail patch budget. + let width = 3201; + let height = 3201; + let image = ImageBuffer::from_pixel(width, height, Luma([12u8])); + let mut bytes = std::io::Cursor::new(Vec::new()); + image + .write_to(&mut bytes, ImageFormat::Png) + .expect("encode png"); + let payload = BASE64_STANDARD.encode(bytes.get_ref()); + let image_url = format!("data:image/png;base64,{payload}"); + let item = ResponseItem::FunctionCallOutput { + call_id: "call-original-capped".to_string(), + output: FunctionCallOutputPayload::from_content_items(vec![ + FunctionCallOutputContentItem::InputImage { + image_url, + detail: Some(ImageDetail::Original), + }, + ]), + }; + + let raw_len = serde_json::to_string(&item).unwrap().len() as i64; + let estimated = estimate_response_item_model_visible_bytes(&item); + let capped_original_detail_image_bytes = + i64::try_from(approx_bytes_for_tokens(ORIGINAL_IMAGE_MAX_PATCHES)).unwrap(); + let expected = raw_len - payload.len() as i64 + capped_original_detail_image_bytes; + + assert_eq!(estimated, expected); +} + #[test] fn original_detail_webp_images_scale_with_dimensions() { // Same dimensions as the PNG case above, so the patch-based replacement cost is the same.