Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MODULE.bazel.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions codex-rs/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions codex-rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ indexmap = "2.12.0"
insta = "1.46.3"
inventory = "0.3.19"
itertools = "0.14.0"
jsonptr = { version = "0.7.1", default-features = false }
jsonwebtoken = "9.3.1"
keyring = { version = "3.6", default-features = false }
landlock = "0.4.4"
Expand Down
2 changes: 2 additions & 0 deletions codex-rs/tools/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ codex-protocol = { workspace = true }
codex-utils-absolute-path = { workspace = true }
codex-utils-pty = { workspace = true }
codex-utils-string = { workspace = true }
jsonptr = { workspace = true }
rmcp = { workspace = true, default-features = false, features = [
"base64",
"macros",
Expand All @@ -26,6 +27,7 @@ serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
urlencoding = { workspace = true }

[dev-dependencies]
pretty_assertions = { workspace = true }
Expand Down
226 changes: 225 additions & 1 deletion codex-rs/tools/src/json_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use serde::Serialize;
use serde_json::Value as JsonValue;
use serde_json::json;
use std::collections::BTreeMap;
use std::collections::BTreeSet;

/// Primitive JSON Schema type names we support in tool definitions.
///
Expand Down Expand Up @@ -33,6 +34,8 @@ pub enum JsonSchemaType {
/// Generic JSON-Schema subset needed for our tool definitions.
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
pub struct JsonSchema {
#[serde(rename = "$ref", skip_serializing_if = "Option::is_none")]
pub schema_ref: Option<String>,
#[serde(rename = "type", skip_serializing_if = "Option::is_none")]
pub schema_type: Option<JsonSchemaType>,
#[serde(skip_serializing_if = "Option::is_none")]
Expand All @@ -52,6 +55,10 @@ pub struct JsonSchema {
pub additional_properties: Option<AdditionalProperties>,
#[serde(rename = "anyOf", skip_serializing_if = "Option::is_none")]
pub any_of: Option<Vec<JsonSchema>>,
#[serde(rename = "$defs", skip_serializing_if = "Option::is_none")]
pub defs: Option<BTreeMap<String, JsonSchema>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub definitions: Option<BTreeMap<String, JsonSchema>>,
Comment on lines +58 to +61
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P0 Badge Cap preserved schema definitions before exposing them

For MCP/dynamic tools, these newly serialized $defs/definitions become part of the model-visible tool parameters on every request. A connector can return a reachable definition containing a huge enum, description, or nested schema; prune_unreachable_definitions only checks reachability and does not impose any byte/token cap, so this can inject >1k or >10k tokens into context. The context-review rules require hard caps for injected items, so please truncate, reject, or otherwise bound preserved definitions before serialization.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Screenshot 2026-05-19 at 1 47 04 PM Did a pass of token counts increase due to this change and it's not too bad. I think we can leave it as-is for now

Copy link
Copy Markdown
Collaborator Author

@celia-oai celia-oai May 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we can address the token cap issue separately as a follow-up, but don't think it's blocking this pr

}

impl JsonSchema {
Expand Down Expand Up @@ -149,6 +156,7 @@ impl From<JsonSchema> for AdditionalProperties {
pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result<JsonSchema, serde_json::Error> {
let mut input_schema = input_schema.clone();
sanitize_json_schema(&mut input_schema);
prune_unreachable_definitions(&mut input_schema);
Comment thread
celia-oai marked this conversation as resolved.
let schema: JsonSchema = serde_json::from_value(input_schema)?;
if matches!(
schema.schema_type,
Expand All @@ -163,6 +171,7 @@ pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result<JsonSchema, s
/// schema representation. This function:
/// - Ensures every typed schema object has a `"type"` when required.
/// - Preserves explicit `anyOf`.
/// - Preserves `$ref` and reachable local `$defs` / `definitions`.
/// - Collapses `const` into single-value `enum`.
/// - Fills required child fields for object/array schema types, including
/// nullable unions, with permissive defaults when absent.
Expand Down Expand Up @@ -200,14 +209,16 @@ fn sanitize_json_schema(value: &mut JsonValue) {
if let Some(value) = map.get_mut("anyOf") {
sanitize_json_schema(value);
}
sanitize_schema_table(map, "$defs");
sanitize_schema_table(map, "definitions");

if let Some(const_value) = map.remove("const") {
map.insert("enum".to_string(), JsonValue::Array(vec![const_value]));
}

let mut schema_types = normalized_schema_types(map);

if schema_types.is_empty() && map.contains_key("anyOf") {
if schema_types.is_empty() && (map.contains_key("$ref") || map.contains_key("anyOf")) {
return;
}

Expand Down Expand Up @@ -241,6 +252,29 @@ fn sanitize_json_schema(value: &mut JsonValue) {
}
}

/// Sanitize a schema definition table before deserializing into `JsonSchema`.
///
/// Definition tables must be objects. Codex keeps valid definition tables and
/// recursively applies the same compatibility lowering used for inline schemas,
/// but drops malformed tables so `strict: false` tool registration degrades
/// gracefully instead of failing on an unreachable or invalid definition table.
fn sanitize_schema_table(map: &mut serde_json::Map<String, JsonValue>, key: &str) {
let should_remove = match map.get_mut(key) {
Some(JsonValue::Object(definitions)) => {
for definition in definitions.values_mut() {
sanitize_json_schema(definition);
}
false
}
Some(_) => true,
None => false,
};

if should_remove {
map.remove(key);
}
}

fn ensure_default_children_for_schema_types(
map: &mut serde_json::Map<String, JsonValue>,
schema_types: &[JsonSchemaPrimitiveType],
Expand All @@ -257,6 +291,196 @@ fn ensure_default_children_for_schema_types(
}
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
enum DefinitionTable {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this? can we pass the string around like above in sanitize_schema_table(map, "$defs");.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sg

Defs,
Definitions,
}

impl DefinitionTable {
fn key(&self) -> &'static str {
match self {
Self::Defs => "$defs",
Self::Definitions => "definitions",
}
}
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct DefinitionPointer {
table: DefinitionTable,
name: String,
}

/// Prune unused root definition entries to avoid sending tokens for definitions
/// the tool schema never references.
fn prune_unreachable_definitions(value: &mut JsonValue) {
let reachable = collect_reachable_definitions(value);
let JsonValue::Object(map) = value else {
return;
};

prune_schema_table(map, DefinitionTable::Defs, &reachable);
prune_schema_table(map, DefinitionTable::Definitions, &reachable);
}

fn prune_schema_table(
map: &mut serde_json::Map<String, JsonValue>,
table: DefinitionTable,
reachable: &BTreeSet<DefinitionPointer>,
) {
let Some(JsonValue::Object(definitions)) = map.get_mut(table.key()) else {
return;
};

definitions.retain(|name, _| {
reachable.contains(&DefinitionPointer {
table: table.clone(),
name: name.clone(),
})
});

if definitions.is_empty() {
map.remove(table.key());
}
}

fn collect_reachable_definitions(value: &JsonValue) -> BTreeSet<DefinitionPointer> {
let mut reachable = BTreeSet::new();
let mut pending = Vec::new();

collect_refs_outside_definitions(value, &mut pending);

while let Some(pointer) = pending.pop() {
if !reachable.insert(pointer.clone()) {
continue;
}

if let Some(definition) = definition_for_pointer(value, &pointer) {
collect_refs(definition, &mut pending);
}
}

reachable
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RefCollectionContext {
SchemaObject,
PropertiesMap,
}

fn collect_refs_outside_definitions(value: &JsonValue, refs: &mut Vec<DefinitionPointer>) {
collect_refs_outside_definitions_in_context(value, refs, RefCollectionContext::SchemaObject);
}

fn collect_refs_outside_definitions_in_context(
value: &JsonValue,
refs: &mut Vec<DefinitionPointer>,
context: RefCollectionContext,
) {
match value {
JsonValue::Array(values) => {
for value in values {
collect_refs_outside_definitions_in_context(
value,
refs,
RefCollectionContext::SchemaObject,
);
}
}
JsonValue::Object(map) => match context {
RefCollectionContext::SchemaObject => {
collect_ref_from_map(map, refs);
for (key, value) in map {
if key == "$defs" || key == "definitions" {
continue;
}

let child_context = if key == "properties" {
RefCollectionContext::PropertiesMap
} else {
RefCollectionContext::SchemaObject
};
collect_refs_outside_definitions_in_context(value, refs, child_context);
}
}
RefCollectionContext::PropertiesMap => {
for value in map.values() {
collect_refs_outside_definitions_in_context(
value,
refs,
RefCollectionContext::SchemaObject,
);
}
}
},
_ => {}
}
}

fn collect_refs(value: &JsonValue, refs: &mut Vec<DefinitionPointer>) {
match value {
JsonValue::Array(values) => {
for value in values {
collect_refs(value, refs);
}
}
JsonValue::Object(map) => {
collect_ref_from_map(map, refs);
for value in map.values() {
collect_refs(value, refs);
}
}
_ => {}
}
}

fn collect_ref_from_map(
map: &serde_json::Map<String, JsonValue>,
refs: &mut Vec<DefinitionPointer>,
) {
if let Some(JsonValue::String(schema_ref)) = map.get("$ref")
&& let Some(pointer) = parse_local_definition_ref(schema_ref)
{
refs.push(pointer);
}
}

fn definition_for_pointer<'a>(
value: &'a JsonValue,
pointer: &DefinitionPointer,
) -> Option<&'a JsonValue> {
let JsonValue::Object(map) = value else {
return None;
};

map.get(pointer.table.key())
.and_then(JsonValue::as_object)
.and_then(|definitions| definitions.get(&pointer.name))
}

fn parse_local_definition_ref(schema_ref: &str) -> Option<DefinitionPointer> {
let fragment = schema_ref.strip_prefix('#')?;
let pointer = urlencoding::decode(fragment).ok()?;
let pointer = jsonptr::Pointer::parse(pointer.as_ref()).ok()?;

let (table_token, pointer) = pointer.split_front()?;
let table = match table_token.decoded().as_ref() {
"$defs" => DefinitionTable::Defs,
"definitions" => DefinitionTable::Definitions,
_ => return None,
};

// Responses API non-strict mode accepts nested local refs such as
// `#/$defs/User/properties/name`, so keep the parent definition reachable.
let (name, _) = pointer.split_front()?;
Some(DefinitionPointer {
table,
name: name.decoded().into_owned(),
})
}

fn normalized_schema_types(
map: &serde_json::Map<String, JsonValue>,
) -> Vec<JsonSchemaPrimitiveType> {
Expand Down
Loading
Loading