From c2fc3173212063024c0334e1aa2d2c36b310d838 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Tue, 9 Dec 2025 16:55:01 -0800 Subject: [PATCH 1/2] Parse rg --- codex-rs/core/src/parse_command.rs | 127 +++++++++++++++++++---------- 1 file changed, 82 insertions(+), 45 deletions(-) diff --git a/codex-rs/core/src/parse_command.rs b/codex-rs/core/src/parse_command.rs index f3353470427..d398e4107ab 100644 --- a/codex-rs/core/src/parse_command.rs +++ b/codex-rs/core/src/parse_command.rs @@ -117,9 +117,6 @@ mod tests { query: None, path: None, }, - ParsedCommand::Unknown { - cmd: "head -n 40".to_string(), - }, ], ); } @@ -143,16 +140,11 @@ mod tests { let inner = "rg -n \"BUG|FIXME|TODO|XXX|HACK\" -S | head -n 200"; assert_parsed( &vec_str(&["bash", "-lc", inner]), - vec![ - ParsedCommand::Search { - cmd: "rg -n 'BUG|FIXME|TODO|XXX|HACK' -S".to_string(), - query: Some("BUG|FIXME|TODO|XXX|HACK".to_string()), - path: None, - }, - ParsedCommand::Unknown { - cmd: "head -n 200".to_string(), - }, - ], + vec![ParsedCommand::Search { + cmd: "rg -n 'BUG|FIXME|TODO|XXX|HACK' -S".to_string(), + query: Some("BUG|FIXME|TODO|XXX|HACK".to_string()), + path: None, + }], ); } @@ -174,16 +166,11 @@ mod tests { let inner = "rg --files | head -n 50"; assert_parsed( &vec_str(&["bash", "-lc", inner]), - vec![ - ParsedCommand::Search { - cmd: "rg --files".to_string(), - query: None, - path: None, - }, - ParsedCommand::Unknown { - cmd: "head -n 50".to_string(), - }, - ], + vec![ParsedCommand::Search { + cmd: "rg --files".to_string(), + query: None, + path: None, + }], ); } @@ -391,6 +378,20 @@ mod tests { ); } + #[test] + fn supports_single_string_script_with_cd_and_pipe() { + assert_parsed( + &vec_str(&[ + r#"cd /Users/pakrym/code/codex && rg -n "codex_api" codex-rs -S | head -n 50"#, + ]), + vec![ParsedCommand::Search { + cmd: "rg -n codex_api codex-rs -S".to_string(), + query: Some("codex_api".to_string()), + path: Some("codex-rs".to_string()), + }], + ); + } + // ---- is_small_formatting_command unit tests ---- #[test] fn small_formatting_always_true_commands() { @@ -408,10 +409,8 @@ mod tests { fn head_behavior() { // No args -> small formatting assert!(is_small_formatting_command(&vec_str(&["head"]))); - // Numeric count only -> not considered small formatting by implementation - assert!(!is_small_formatting_command(&shlex_split_safe( - "head -n 40" - ))); + // Numeric count only -> formatting + assert!(is_small_formatting_command(&shlex_split_safe("head -n 40"))); // With explicit file -> not small formatting assert!(!is_small_formatting_command(&shlex_split_safe( "head -n 40 file.txt" @@ -424,17 +423,15 @@ mod tests { fn tail_behavior() { // No args -> small formatting assert!(is_small_formatting_command(&vec_str(&["tail"]))); - // Numeric with plus offset -> not small formatting - assert!(!is_small_formatting_command(&shlex_split_safe( + // Numeric with plus offset -> formatting + assert!(is_small_formatting_command(&shlex_split_safe( "tail -n +10" ))); assert!(!is_small_formatting_command(&shlex_split_safe( "tail -n +10 file.txt" ))); - // Numeric count - assert!(!is_small_formatting_command(&shlex_split_safe( - "tail -n 30" - ))); + // Numeric count -> formatting + assert!(is_small_formatting_command(&shlex_split_safe("tail -n 30"))); assert!(!is_small_formatting_command(&shlex_split_safe( "tail -n 30 file.txt" ))); @@ -718,16 +715,11 @@ mod tests { let inner = "rg --files | head -n 1"; assert_parsed( &shlex_split_safe(inner), - vec![ - ParsedCommand::Search { - cmd: "rg --files".to_string(), - query: None, - path: None, - }, - ParsedCommand::Unknown { - cmd: "head -n 1".to_string(), - }, - ], + vec![ParsedCommand::Search { + cmd: "rg --files".to_string(), + query: None, + path: None, + }], ); } @@ -940,6 +932,19 @@ pub fn parse_command_impl(command: &[String]) -> Vec { vec![normalized] }; + // If we have a compound/pipelined command, drop small formatting helpers + // (e.g., `head -n 50`) so summaries focus on the primary operation. + // If dropping them would remove everything except `cd`, keep the originals. + let parts = if parts.len() > 1 { + let filtered = drop_small_formatting_commands(parts.clone()); + let has_non_cd = filtered + .iter() + .any(|tokens| tokens.first().is_some_and(|t| t != "cd")); + if has_non_cd { filtered } else { parts } + } else { + parts + }; + // Preserve left-to-right execution order for all commands, including bash -c/-lc // so summaries reflect the order they will run. @@ -1083,6 +1088,14 @@ fn normalize_tokens(cmd: &[String]) -> Vec { { shlex_split(script).unwrap_or_else(|| vec![shell.clone(), flag.clone(), script.clone()]) } + [script] + if script + .chars() + .any(|c| c.is_whitespace() || c == '|' || c == ';' || c == '&') => + { + shlex_split(script) + .unwrap_or_else(|| script.split_whitespace().map(str::to_string).collect()) + } _ => cmd.to_vec(), } } @@ -1384,13 +1397,37 @@ fn is_small_formatting_command(tokens: &[String]) -> bool { // Treat as formatting when no explicit file operand is present. // Common forms: `head -n 40`, `head -c 100`. // Keep cases like `head -n 40 file`. - tokens.len() < 3 + match tokens { + // `head` / `head ` / `head -n50` (we don't parse `head ` elsewhere) + [_] | [_, _] => true, + // `head -n 40` / `head -c 100` (no file operand) + [_, flag, count] + if (flag == "-n" || flag == "-c") + && count.chars().all(|c| c.is_ascii_digit()) => + { + true + } + _ => false, + } } "tail" => { // Treat as formatting when no explicit file operand is present. // Common forms: `tail -n +10`, `tail -n 30`. // Keep cases like `tail -n 30 file`. - tokens.len() < 3 + match tokens { + // `tail` / `tail ` / `tail -n30` / `tail -n+10` + [_] | [_, _] => true, + // `tail -n 30` / `tail -n +10` (no file operand) + [_, flag, count] + if flag == "-n" + && (count.chars().all(|c| c.is_ascii_digit()) + || (count.starts_with('+') + && count[1..].chars().all(|c| c.is_ascii_digit()))) => + { + true + } + _ => false, + } } "sed" => { // Keep `sed -n file` (treated as a file read elsewhere); From 629f54746f2ba46955d6cf77bd6fc019105d3946 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Tue, 9 Dec 2025 17:11:31 -0800 Subject: [PATCH 2/2] parse_command: treat head/tail as reads, refine formatting detection --- codex-rs/core/src/parse_command.rs | 116 ++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 35 deletions(-) diff --git a/codex-rs/core/src/parse_command.rs b/codex-rs/core/src/parse_command.rs index d398e4107ab..399513f5ae0 100644 --- a/codex-rs/core/src/parse_command.rs +++ b/codex-rs/core/src/parse_command.rs @@ -260,6 +260,19 @@ mod tests { ); } + #[test] + fn supports_head_file_only() { + let inner = "head Cargo.toml"; + assert_parsed( + &vec_str(&["bash", "-lc", inner]), + vec![ParsedCommand::Read { + cmd: inner.to_string(), + name: "Cargo.toml".to_string(), + path: PathBuf::from("Cargo.toml"), + }], + ); + } + #[test] fn supports_cat_sed_n() { let inner = "cat tui/Cargo.toml | sed -n '1,200p'"; @@ -300,6 +313,19 @@ mod tests { ); } + #[test] + fn supports_tail_file_only() { + let inner = "tail README.md"; + assert_parsed( + &vec_str(&["bash", "-lc", inner]), + vec![ParsedCommand::Read { + cmd: inner.to_string(), + name: "README.md".to_string(), + path: PathBuf::from("README.md"), + }], + ); + } + #[test] fn supports_npm_run_build_is_unknown() { assert_parsed( @@ -380,10 +406,9 @@ mod tests { #[test] fn supports_single_string_script_with_cd_and_pipe() { + let inner = r#"cd /Users/pakrym/code/codex && rg -n "codex_api" codex-rs -S | head -n 50"#; assert_parsed( - &vec_str(&[ - r#"cd /Users/pakrym/code/codex && rg -n "codex_api" codex-rs -S | head -n 50"#, - ]), + &vec_str(&["bash", "-lc", inner]), vec![ParsedCommand::Search { cmd: "rg -n codex_api codex-rs -S".to_string(), query: Some("codex_api".to_string()), @@ -415,8 +440,10 @@ mod tests { assert!(!is_small_formatting_command(&shlex_split_safe( "head -n 40 file.txt" ))); - // File only (no count) -> treated as small formatting by implementation - assert!(is_small_formatting_command(&vec_str(&["head", "file.txt"]))); + // File only (no count) -> not formatting + assert!(!is_small_formatting_command(&vec_str(&[ + "head", "file.txt" + ]))); } #[test] @@ -435,8 +462,15 @@ mod tests { assert!(!is_small_formatting_command(&shlex_split_safe( "tail -n 30 file.txt" ))); - // File only -> small formatting by implementation - assert!(is_small_formatting_command(&vec_str(&["tail", "file.txt"]))); + // Byte count -> formatting + assert!(is_small_formatting_command(&shlex_split_safe("tail -c 30"))); + assert!(is_small_formatting_command(&shlex_split_safe( + "tail -c +10" + ))); + // File only (no count) -> not formatting + assert!(!is_small_formatting_command(&vec_str(&[ + "tail", "file.txt" + ]))); } #[test] @@ -711,10 +745,10 @@ mod tests { #[test] fn bash_dash_c_pipeline_parsing() { - // Ensure -c is handled similarly to -lc by normalization + // Ensure -c is handled similarly to -lc by shell parsing let inner = "rg --files | head -n 1"; assert_parsed( - &shlex_split_safe(inner), + &vec_str(&["bash", "-c", inner]), vec![ParsedCommand::Search { cmd: "rg --files".to_string(), query: None, @@ -932,19 +966,6 @@ pub fn parse_command_impl(command: &[String]) -> Vec { vec![normalized] }; - // If we have a compound/pipelined command, drop small formatting helpers - // (e.g., `head -n 50`) so summaries focus on the primary operation. - // If dropping them would remove everything except `cd`, keep the originals. - let parts = if parts.len() > 1 { - let filtered = drop_small_formatting_commands(parts.clone()); - let has_non_cd = filtered - .iter() - .any(|tokens| tokens.first().is_some_and(|t| t != "cd")); - if has_non_cd { filtered } else { parts } - } else { - parts - }; - // Preserve left-to-right execution order for all commands, including bash -c/-lc // so summaries reflect the order they will run. @@ -1088,14 +1109,6 @@ fn normalize_tokens(cmd: &[String]) -> Vec { { shlex_split(script).unwrap_or_else(|| vec![shell.clone(), flag.clone(), script.clone()]) } - [script] - if script - .chars() - .any(|c| c.is_whitespace() || c == '|' || c == ';' || c == '&') => - { - shlex_split(script) - .unwrap_or_else(|| script.split_whitespace().map(str::to_string).collect()) - } _ => cmd.to_vec(), } } @@ -1398,8 +1411,10 @@ fn is_small_formatting_command(tokens: &[String]) -> bool { // Common forms: `head -n 40`, `head -c 100`. // Keep cases like `head -n 40 file`. match tokens { - // `head` / `head ` / `head -n50` (we don't parse `head ` elsewhere) - [_] | [_, _] => true, + // `head` + [_] => true, + // `head ` or `head -n50`/`head -c100` + [_, arg] => arg.starts_with('-'), // `head -n 40` / `head -c 100` (no file operand) [_, flag, count] if (flag == "-n" || flag == "-c") @@ -1412,11 +1427,13 @@ fn is_small_formatting_command(tokens: &[String]) -> bool { } "tail" => { // Treat as formatting when no explicit file operand is present. - // Common forms: `tail -n +10`, `tail -n 30`. + // Common forms: `tail -n +10`, `tail -n 30`, `tail -c 100`. // Keep cases like `tail -n 30 file`. match tokens { - // `tail` / `tail ` / `tail -n30` / `tail -n+10` - [_] | [_, _] => true, + // `tail` + [_] => true, + // `tail ` or `tail -n30`/`tail -n+10` + [_, arg] => arg.starts_with('-'), // `tail -n 30` / `tail -n +10` (no file operand) [_, flag, count] if flag == "-n" @@ -1426,6 +1443,15 @@ fn is_small_formatting_command(tokens: &[String]) -> bool { { true } + // `tail -c 100` / `tail -c +10` (no file operand) + [_, flag, count] + if flag == "-c" + && (count.chars().all(|c| c.is_ascii_digit()) + || (count.starts_with('+') + && count[1..].chars().all(|c| c.is_ascii_digit()))) => + { + true + } _ => false, } } @@ -1580,6 +1606,16 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand { }; } } + if let [path] = tail + && !path.starts_with('-') + { + let name = short_display_path(path); + return ParsedCommand::Read { + cmd: shlex_join(main_cmd), + name, + path: PathBuf::from(path), + }; + } ParsedCommand::Unknown { cmd: shlex_join(main_cmd), } @@ -1624,6 +1660,16 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand { }; } } + if let [path] = tail + && !path.starts_with('-') + { + let name = short_display_path(path); + return ParsedCommand::Read { + cmd: shlex_join(main_cmd), + name, + path: PathBuf::from(path), + }; + } ParsedCommand::Unknown { cmd: shlex_join(main_cmd), }