Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions codex-rs/core/src/context_manager/history.rs
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,10 @@ const RESIZED_IMAGE_BYTES_ESTIMATE: i64 = 7373;
// Use a direct 32px patch count only for `detail: "original"`;
// all other image inputs continue to use `RESIZED_IMAGE_BYTES_ESTIMATE`.
const ORIGINAL_IMAGE_PATCH_SIZE: u32 = 32;
// See https://platform.openai.com/docs/guides/images-vision#model-sizing-behavior.
// Keep this hard-coded for now; move it into model capabilities if the patch
// budget starts changing often across model releases.
const ORIGINAL_IMAGE_MAX_PATCHES: usize = 10_000;
const ORIGINAL_IMAGE_ESTIMATE_CACHE_SIZE: usize = 32;

static ORIGINAL_IMAGE_ESTIMATE_CACHE: LazyLock<BlockingLruCache<[u8; 20], Option<i64>>> =
Expand Down Expand Up @@ -621,6 +625,7 @@ fn estimate_original_image_bytes(image_url: &str) -> Option<i64> {
let patches_high = height.saturating_add(patch_size.saturating_sub(1)) / patch_size;
let patch_count = patches_wide.saturating_mul(patches_high);
let patch_count = usize::try_from(patch_count).unwrap_or(usize::MAX);
let patch_count = patch_count.min(ORIGINAL_IMAGE_MAX_PATCHES);
Some(i64::try_from(approx_bytes_for_tokens(patch_count)).unwrap_or(i64::MAX))
})
}
Expand Down
33 changes: 33 additions & 0 deletions codex-rs/core/src/context_manager/history_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use codex_utils_output_truncation::TruncationPolicy;
use codex_utils_output_truncation::truncate_text;
use image::ImageBuffer;
use image::ImageFormat;
use image::Luma;
use image::Rgba;
use pretty_assertions::assert_eq;
use regex_lite::Regex;
Expand Down Expand Up @@ -1900,6 +1901,38 @@ fn original_detail_images_scale_with_dimensions() {
assert_eq!(estimated, expected);
}

#[test]
fn original_detail_images_are_capped_at_max_patch_count() {
// 3201x3201 at 32px patches yields 101 * 101 = 10,201 patches,
// which exceeds the original-detail patch budget.
let width = 3201;
let height = 3201;
let image = ImageBuffer::from_pixel(width, height, Luma([12u8]));
let mut bytes = std::io::Cursor::new(Vec::new());
image
.write_to(&mut bytes, ImageFormat::Png)
.expect("encode png");
let payload = BASE64_STANDARD.encode(bytes.get_ref());
let image_url = format!("data:image/png;base64,{payload}");
let item = ResponseItem::FunctionCallOutput {
call_id: "call-original-capped".to_string(),
output: FunctionCallOutputPayload::from_content_items(vec![
FunctionCallOutputContentItem::InputImage {
image_url,
detail: Some(ImageDetail::Original),
},
]),
};

let raw_len = serde_json::to_string(&item).unwrap().len() as i64;
let estimated = estimate_response_item_model_visible_bytes(&item);
let capped_original_detail_image_bytes =
i64::try_from(approx_bytes_for_tokens(ORIGINAL_IMAGE_MAX_PATCHES)).unwrap();
let expected = raw_len - payload.len() as i64 + capped_original_detail_image_bytes;

assert_eq!(estimated, expected);
}

#[test]
fn original_detail_webp_images_scale_with_dimensions() {
// Same dimensions as the PNG case above, so the patch-based replacement cost is the same.
Expand Down
Loading