-
Notifications
You must be signed in to change notification settings - Fork 12.3k
feat: support local refs and defs in tool input schemas #23357
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ use serde::Serialize; | |
| use serde_json::Value as JsonValue; | ||
| use serde_json::json; | ||
| use std::collections::BTreeMap; | ||
| use std::collections::BTreeSet; | ||
|
|
||
| /// Primitive JSON Schema type names we support in tool definitions. | ||
| /// | ||
|
|
@@ -33,6 +34,8 @@ pub enum JsonSchemaType { | |
| /// Generic JSON-Schema subset needed for our tool definitions. | ||
| #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] | ||
| pub struct JsonSchema { | ||
| #[serde(rename = "$ref", skip_serializing_if = "Option::is_none")] | ||
| pub schema_ref: Option<String>, | ||
| #[serde(rename = "type", skip_serializing_if = "Option::is_none")] | ||
| pub schema_type: Option<JsonSchemaType>, | ||
| #[serde(skip_serializing_if = "Option::is_none")] | ||
|
|
@@ -52,6 +55,10 @@ pub struct JsonSchema { | |
| pub additional_properties: Option<AdditionalProperties>, | ||
| #[serde(rename = "anyOf", skip_serializing_if = "Option::is_none")] | ||
| pub any_of: Option<Vec<JsonSchema>>, | ||
| #[serde(rename = "$defs", skip_serializing_if = "Option::is_none")] | ||
| pub defs: Option<BTreeMap<String, JsonSchema>>, | ||
| #[serde(skip_serializing_if = "Option::is_none")] | ||
| pub definitions: Option<BTreeMap<String, JsonSchema>>, | ||
| } | ||
|
|
||
| impl JsonSchema { | ||
|
|
@@ -149,6 +156,7 @@ impl From<JsonSchema> for AdditionalProperties { | |
| pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result<JsonSchema, serde_json::Error> { | ||
| let mut input_schema = input_schema.clone(); | ||
| sanitize_json_schema(&mut input_schema); | ||
| prune_unreachable_definitions(&mut input_schema); | ||
|
celia-oai marked this conversation as resolved.
|
||
| let schema: JsonSchema = serde_json::from_value(input_schema)?; | ||
| if matches!( | ||
| schema.schema_type, | ||
|
|
@@ -163,6 +171,7 @@ pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result<JsonSchema, s | |
| /// schema representation. This function: | ||
| /// - Ensures every typed schema object has a `"type"` when required. | ||
| /// - Preserves explicit `anyOf`. | ||
| /// - Preserves `$ref` and reachable local `$defs` / `definitions`. | ||
| /// - Collapses `const` into single-value `enum`. | ||
| /// - Fills required child fields for object/array schema types, including | ||
| /// nullable unions, with permissive defaults when absent. | ||
|
|
@@ -200,14 +209,16 @@ fn sanitize_json_schema(value: &mut JsonValue) { | |
| if let Some(value) = map.get_mut("anyOf") { | ||
| sanitize_json_schema(value); | ||
| } | ||
| sanitize_schema_table(map, "$defs"); | ||
| sanitize_schema_table(map, "definitions"); | ||
|
|
||
| if let Some(const_value) = map.remove("const") { | ||
| map.insert("enum".to_string(), JsonValue::Array(vec![const_value])); | ||
| } | ||
|
|
||
| let mut schema_types = normalized_schema_types(map); | ||
|
|
||
| if schema_types.is_empty() && map.contains_key("anyOf") { | ||
| if schema_types.is_empty() && (map.contains_key("$ref") || map.contains_key("anyOf")) { | ||
| return; | ||
| } | ||
|
|
||
|
|
@@ -241,6 +252,29 @@ fn sanitize_json_schema(value: &mut JsonValue) { | |
| } | ||
| } | ||
|
|
||
| /// Sanitize a schema definition table before deserializing into `JsonSchema`. | ||
| /// | ||
| /// Definition tables must be objects. Codex keeps valid definition tables and | ||
| /// recursively applies the same compatibility lowering used for inline schemas, | ||
| /// but drops malformed tables so `strict: false` tool registration degrades | ||
| /// gracefully instead of failing on an unreachable or invalid definition table. | ||
| fn sanitize_schema_table(map: &mut serde_json::Map<String, JsonValue>, key: &str) { | ||
| let should_remove = match map.get_mut(key) { | ||
| Some(JsonValue::Object(definitions)) => { | ||
| for definition in definitions.values_mut() { | ||
| sanitize_json_schema(definition); | ||
| } | ||
| false | ||
| } | ||
| Some(_) => true, | ||
| None => false, | ||
| }; | ||
|
|
||
| if should_remove { | ||
| map.remove(key); | ||
| } | ||
| } | ||
|
|
||
| fn ensure_default_children_for_schema_types( | ||
| map: &mut serde_json::Map<String, JsonValue>, | ||
| schema_types: &[JsonSchemaPrimitiveType], | ||
|
|
@@ -257,6 +291,196 @@ fn ensure_default_children_for_schema_types( | |
| } | ||
| } | ||
|
|
||
| #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] | ||
| enum DefinitionTable { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this? can we pass the string around like above in
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sg |
||
| Defs, | ||
| Definitions, | ||
| } | ||
|
|
||
| impl DefinitionTable { | ||
| fn key(&self) -> &'static str { | ||
| match self { | ||
| Self::Defs => "$defs", | ||
| Self::Definitions => "definitions", | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] | ||
| struct DefinitionPointer { | ||
| table: DefinitionTable, | ||
| name: String, | ||
| } | ||
|
|
||
| /// Prune unused root definition entries to avoid sending tokens for definitions | ||
| /// the tool schema never references. | ||
| fn prune_unreachable_definitions(value: &mut JsonValue) { | ||
| let reachable = collect_reachable_definitions(value); | ||
| let JsonValue::Object(map) = value else { | ||
| return; | ||
| }; | ||
|
|
||
| prune_schema_table(map, DefinitionTable::Defs, &reachable); | ||
| prune_schema_table(map, DefinitionTable::Definitions, &reachable); | ||
| } | ||
|
|
||
| fn prune_schema_table( | ||
| map: &mut serde_json::Map<String, JsonValue>, | ||
| table: DefinitionTable, | ||
| reachable: &BTreeSet<DefinitionPointer>, | ||
| ) { | ||
| let Some(JsonValue::Object(definitions)) = map.get_mut(table.key()) else { | ||
| return; | ||
| }; | ||
|
|
||
| definitions.retain(|name, _| { | ||
| reachable.contains(&DefinitionPointer { | ||
| table: table.clone(), | ||
| name: name.clone(), | ||
| }) | ||
| }); | ||
|
|
||
| if definitions.is_empty() { | ||
| map.remove(table.key()); | ||
| } | ||
| } | ||
|
|
||
| fn collect_reachable_definitions(value: &JsonValue) -> BTreeSet<DefinitionPointer> { | ||
| let mut reachable = BTreeSet::new(); | ||
| let mut pending = Vec::new(); | ||
|
|
||
| collect_refs_outside_definitions(value, &mut pending); | ||
|
|
||
| while let Some(pointer) = pending.pop() { | ||
| if !reachable.insert(pointer.clone()) { | ||
| continue; | ||
| } | ||
|
|
||
| if let Some(definition) = definition_for_pointer(value, &pointer) { | ||
| collect_refs(definition, &mut pending); | ||
| } | ||
| } | ||
|
|
||
| reachable | ||
| } | ||
|
|
||
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||
| enum RefCollectionContext { | ||
| SchemaObject, | ||
| PropertiesMap, | ||
| } | ||
|
|
||
| fn collect_refs_outside_definitions(value: &JsonValue, refs: &mut Vec<DefinitionPointer>) { | ||
| collect_refs_outside_definitions_in_context(value, refs, RefCollectionContext::SchemaObject); | ||
| } | ||
|
|
||
| fn collect_refs_outside_definitions_in_context( | ||
| value: &JsonValue, | ||
| refs: &mut Vec<DefinitionPointer>, | ||
| context: RefCollectionContext, | ||
| ) { | ||
| match value { | ||
| JsonValue::Array(values) => { | ||
| for value in values { | ||
| collect_refs_outside_definitions_in_context( | ||
| value, | ||
| refs, | ||
| RefCollectionContext::SchemaObject, | ||
| ); | ||
| } | ||
| } | ||
| JsonValue::Object(map) => match context { | ||
| RefCollectionContext::SchemaObject => { | ||
| collect_ref_from_map(map, refs); | ||
| for (key, value) in map { | ||
| if key == "$defs" || key == "definitions" { | ||
| continue; | ||
| } | ||
|
|
||
| let child_context = if key == "properties" { | ||
| RefCollectionContext::PropertiesMap | ||
| } else { | ||
| RefCollectionContext::SchemaObject | ||
| }; | ||
| collect_refs_outside_definitions_in_context(value, refs, child_context); | ||
| } | ||
| } | ||
| RefCollectionContext::PropertiesMap => { | ||
| for value in map.values() { | ||
| collect_refs_outside_definitions_in_context( | ||
| value, | ||
| refs, | ||
| RefCollectionContext::SchemaObject, | ||
| ); | ||
| } | ||
| } | ||
| }, | ||
| _ => {} | ||
| } | ||
| } | ||
|
|
||
| fn collect_refs(value: &JsonValue, refs: &mut Vec<DefinitionPointer>) { | ||
| match value { | ||
| JsonValue::Array(values) => { | ||
| for value in values { | ||
| collect_refs(value, refs); | ||
| } | ||
| } | ||
| JsonValue::Object(map) => { | ||
| collect_ref_from_map(map, refs); | ||
| for value in map.values() { | ||
| collect_refs(value, refs); | ||
| } | ||
| } | ||
| _ => {} | ||
| } | ||
| } | ||
|
|
||
| fn collect_ref_from_map( | ||
| map: &serde_json::Map<String, JsonValue>, | ||
| refs: &mut Vec<DefinitionPointer>, | ||
| ) { | ||
| if let Some(JsonValue::String(schema_ref)) = map.get("$ref") | ||
| && let Some(pointer) = parse_local_definition_ref(schema_ref) | ||
| { | ||
| refs.push(pointer); | ||
| } | ||
| } | ||
|
|
||
| fn definition_for_pointer<'a>( | ||
| value: &'a JsonValue, | ||
| pointer: &DefinitionPointer, | ||
| ) -> Option<&'a JsonValue> { | ||
| let JsonValue::Object(map) = value else { | ||
| return None; | ||
| }; | ||
|
|
||
| map.get(pointer.table.key()) | ||
| .and_then(JsonValue::as_object) | ||
| .and_then(|definitions| definitions.get(&pointer.name)) | ||
| } | ||
|
|
||
| fn parse_local_definition_ref(schema_ref: &str) -> Option<DefinitionPointer> { | ||
| let fragment = schema_ref.strip_prefix('#')?; | ||
| let pointer = urlencoding::decode(fragment).ok()?; | ||
| let pointer = jsonptr::Pointer::parse(pointer.as_ref()).ok()?; | ||
|
|
||
| let (table_token, pointer) = pointer.split_front()?; | ||
| let table = match table_token.decoded().as_ref() { | ||
| "$defs" => DefinitionTable::Defs, | ||
| "definitions" => DefinitionTable::Definitions, | ||
| _ => return None, | ||
| }; | ||
|
|
||
| // Responses API non-strict mode accepts nested local refs such as | ||
| // `#/$defs/User/properties/name`, so keep the parent definition reachable. | ||
| let (name, _) = pointer.split_front()?; | ||
| Some(DefinitionPointer { | ||
| table, | ||
| name: name.decoded().into_owned(), | ||
| }) | ||
| } | ||
|
|
||
| fn normalized_schema_types( | ||
| map: &serde_json::Map<String, JsonValue>, | ||
| ) -> Vec<JsonSchemaPrimitiveType> { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For MCP/dynamic tools, these newly serialized
$defs/definitionsbecome part of the model-visible tool parameters on every request. A connector can return a reachable definition containing a huge enum, description, or nested schema;prune_unreachable_definitionsonly checks reachability and does not impose any byte/token cap, so this can inject >1k or >10k tokens into context. The context-review rules require hard caps for injected items, so please truncate, reject, or otherwise bound preserved definitions before serialization.Useful? React with 👍 / 👎.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe we can address the token cap issue separately as a follow-up, but don't think it's blocking this pr