Skip to content
2 changes: 1 addition & 1 deletion codex-rs/core/src/client_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ fn reserialize_shell_outputs(items: &mut [ResponseItem]) {
}

fn is_shell_tool_name(name: &str) -> bool {
matches!(name, "shell" | "container.exec" | "shell_command")
matches!(name, "shell" | "container.exec")
}

#[derive(Deserialize)]
Expand Down
2 changes: 1 addition & 1 deletion codex-rs/core/src/model_family.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
slug, "gpt-5",
supports_reasoning_summaries: true,
needs_special_apply_patch_instructions: true,
shell_type: ConfigShellToolType::ShellCommand,
shell_type: ConfigShellToolType::Default,
support_verbosity: true,
truncation_policy: TruncationPolicy::Bytes(10_000),
)
Expand Down
23 changes: 20 additions & 3 deletions codex-rs/core/src/tools/events.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ pub(crate) enum ToolEmitter {
cwd: PathBuf,
source: ExecCommandSource,
parsed_cmd: Vec<ParsedCommand>,
freeform: bool,
},
ApplyPatch {
changes: HashMap<PathBuf, FileChange>,
Expand All @@ -103,13 +104,19 @@ pub(crate) enum ToolEmitter {
}

impl ToolEmitter {
pub fn shell(command: Vec<String>, cwd: PathBuf, source: ExecCommandSource) -> Self {
pub fn shell(
command: Vec<String>,
cwd: PathBuf,
source: ExecCommandSource,
freeform: bool,
) -> Self {
let parsed_cmd = parse_command(&command);
Self::Shell {
command,
cwd,
source,
parsed_cmd,
freeform,
}
}

Expand Down Expand Up @@ -144,6 +151,7 @@ impl ToolEmitter {
cwd,
source,
parsed_cmd,
..
},
stage,
) => {
Expand Down Expand Up @@ -234,14 +242,23 @@ impl ToolEmitter {
self.emit(ctx, ToolEventStage::Begin).await;
}

fn format_exec_output_for_model(&self, output: &ExecToolCallOutput) -> String {
match self {
Self::Shell { freeform: true, .. } => {
super::format_exec_output_for_model_freeform(output)
}
_ => super::format_exec_output_for_model_structured(output),
}
}

pub async fn finish(
&self,
ctx: ToolEventCtx<'_>,
out: Result<ExecToolCallOutput, ToolError>,
) -> Result<String, FunctionCallError> {
let (event, result) = match out {
Ok(output) => {
let content = super::format_exec_output_for_model(&output);
let content = self.format_exec_output_for_model(&output);
let exit_code = output.exit_code;
let event = ToolEventStage::Success(output);
let result = if exit_code == 0 {
Expand All @@ -253,7 +270,7 @@ impl ToolEmitter {
}
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
let response = super::format_exec_output_for_model(&output);
let response = self.format_exec_output_for_model(&output);
let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
let result = Err(FunctionCallError::RespondToModel(response));
(event, result)
Expand Down
12 changes: 10 additions & 2 deletions codex-rs/core/src/tools/handlers/shell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ impl ToolHandler for ShellHandler {
turn,
tracker,
call_id,
false,
)
.await
}
Expand All @@ -129,6 +130,7 @@ impl ToolHandler for ShellHandler {
turn,
tracker,
call_id,
false,
)
.await
}
Expand Down Expand Up @@ -176,6 +178,7 @@ impl ToolHandler for ShellCommandHandler {
turn,
tracker,
call_id,
true,
)
.await
}
Expand All @@ -189,6 +192,7 @@ impl ShellHandler {
turn: Arc<TurnContext>,
tracker: crate::tools::context::SharedTurnDiffTracker,
call_id: String,
freeform: bool,
) -> Result<ToolOutput, FunctionCallError> {
// Approval policy guard for explicit escalation in non-OnRequest modes.
if exec_params.with_escalated_permissions.unwrap_or(false)
Expand Down Expand Up @@ -282,8 +286,12 @@ impl ShellHandler {
}

let source = ExecCommandSource::Agent;
let emitter =
ToolEmitter::shell(exec_params.command.clone(), exec_params.cwd.clone(), source);
let emitter = ToolEmitter::shell(
exec_params.command.clone(),
exec_params.cwd.clone(),
source,
freeform,
);
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
emitter.begin(event_ctx).await;

Expand Down
30 changes: 29 additions & 1 deletion codex-rs/core/src/tools/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub mod spec;

use crate::context_manager::truncate_with_line_bytes_budget;
use crate::exec::ExecToolCallOutput;
use crate::truncate::truncate_formatted_exec_output;
pub use router::ToolRouter;
use serde::Serialize;

Expand All @@ -25,7 +26,7 @@ const SHELL_OUTPUT_MAX_BYTES: usize = 10_000;

/// Format the combined exec output for sending back to the model.
/// Includes exit code and duration metadata; truncates large bodies safely.
pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput) -> String {
let ExecToolCallOutput {
exit_code,
duration,
Expand Down Expand Up @@ -61,6 +62,33 @@ pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String
serde_json::to_string(&payload).expect("serialize ExecOutput")
}

pub fn format_exec_output_for_model_freeform(exec_output: &ExecToolCallOutput) -> String {
// round to 1 decimal place
let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0;

let total_lines = exec_output.aggregated_output.text.lines().count();

let formatted_output = truncate_formatted_exec_output(
&exec_output.aggregated_output.text,
total_lines,
SHELL_OUTPUT_MAX_BYTES,
256, // TODO: to be removed
);

let mut sections = Vec::new();

sections.push(format!("Exit code: {}", exec_output.exit_code));
sections.push(format!("Wall time: {duration_seconds} seconds"));
if total_lines != formatted_output.lines().count() {
sections.push(format!("Total output lines: {total_lines}"));
}

sections.push("Output:".to_string());
sections.push(formatted_output);

sections.join("\n")
}

pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
let ExecToolCallOutput {
aggregated_output, ..
Expand Down
16 changes: 16 additions & 0 deletions codex-rs/core/src/tools/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1395,6 +1395,22 @@ mod tests {
);
}

#[test]
fn test_gpt_5_defaults() {
assert_model_tools(
"gpt-5",
&Features::with_defaults(),
&[
"shell",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"view_image",
],
);
}

#[test]
fn test_gpt_5_1_defaults() {
assert_model_tools(
Expand Down
2 changes: 1 addition & 1 deletion codex-rs/core/src/truncate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSour
out
}

fn truncate_formatted_exec_output(
pub(crate) fn truncate_formatted_exec_output(
content: &str,
total_lines: usize,
limit_bytes: usize,
Expand Down
26 changes: 13 additions & 13 deletions codex-rs/core/tests/suite/model_tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,19 +102,6 @@ async fn model_selects_expected_tools() {
"codex-mini-latest should expose the local shell tool",
);

let o3_tools = collect_tool_identifiers_for_model("o3").await;
assert_eq!(
o3_tools,
vec![
"shell".to_string(),
"list_mcp_resources".to_string(),
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string()
],
"o3 should expose the generic shell tool",
);

let gpt5_codex_tools = collect_tool_identifiers_for_model("gpt-5-codex").await;
assert_eq!(
gpt5_codex_tools,
Expand Down Expand Up @@ -143,6 +130,19 @@ async fn model_selects_expected_tools() {
"gpt-5.1-codex should expose the apply_patch tool",
);

let gpt5_tools = collect_tool_identifiers_for_model("gpt-5").await;
assert_eq!(
gpt5_tools,
vec![
"shell".to_string(),
"list_mcp_resources".to_string(),
"list_mcp_resource_templates".to_string(),
"read_mcp_resource".to_string(),
"update_plan".to_string(),
],
"gpt-5 should expose the apply_patch tool",
);

let gpt51_tools = collect_tool_identifiers_for_model("gpt-5.1").await;
assert_eq!(
gpt51_tools,
Expand Down
71 changes: 11 additions & 60 deletions codex-rs/core/tests/suite/prompt_caching.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#![allow(clippy::unwrap_used)]

use codex_core::config::OPENAI_DEFAULT_MODEL;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
Expand All @@ -19,7 +18,6 @@ use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use std::collections::HashMap;
use tempfile::TempDir;

fn text_user_input(text: String) -> serde_json::Value {
Expand Down Expand Up @@ -156,62 +154,15 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

// our internal implementation is responsible for keeping tools in sync
// with the OpenAI schema, so we just verify the tool presence here
let tools_by_model: HashMap<&'static str, Vec<&'static str>> = HashMap::from([
(
"gpt-5.1",
vec![
"shell_command",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"view_image",
],
),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we still keep the variations here with gpt-5 to validate old behavior?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

they are never used

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only current model is being picked from the list

(
"arcticfox",
vec![
"shell_command",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"view_image",
],
),
(
"gpt-5.1-codex",
vec![
"shell_command",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"view_image",
],
),
(
"gpt-5.1-codex",
vec![
"shell_command",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"view_image",
],
),
]);
let expected_tools_names = tools_by_model
.get(OPENAI_DEFAULT_MODEL)
.unwrap_or_else(|| panic!("expected tools to be defined for model {OPENAI_DEFAULT_MODEL}"))
.as_slice();
let expected_tools_names = vec![
"shell_command",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"apply_patch",
"view_image",
];
let body0 = req1.single_request().body_json();

let expected_instructions = if expected_tools_names.contains(&"apply_patch") {
Expand All @@ -228,14 +179,14 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
body0["instructions"],
serde_json::json!(expected_instructions),
);
assert_tool_names(&body0, expected_tools_names);
assert_tool_names(&body0, &expected_tools_names);

let body1 = req2.single_request().body_json();
assert_eq!(
body1["instructions"],
serde_json::json!(expected_instructions),
);
assert_tool_names(&body1, expected_tools_names);
assert_tool_names(&body1, &expected_tools_names);

Ok(())
}
Expand Down
Loading
Loading