Skip to content

Commit 7995c66

Browse files
authored
Stream apply_patch changes (#17862)
Adds new events for streaming apply_patch changes from responses api. This is to enable clients to show progress during file writes. Caveat: This does not work with apply_patch in function call mode, since that required adding streaming json parsing.
1 parent 9effa05 commit 7995c66

File tree

20 files changed

+729
-29
lines changed

20 files changed

+729
-29
lines changed

codex-rs/apply-patch/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ use codex_utils_absolute_path::AbsolutePathBuf;
1818
pub use parser::Hunk;
1919
pub use parser::ParseError;
2020
use parser::ParseError::*;
21-
use parser::UpdateFileChunk;
21+
pub use parser::UpdateFileChunk;
2222
pub use parser::parse_patch;
23+
pub use parser::parse_patch_streaming;
2324
use similar::TextDiff;
2425
use thiserror::Error;
2526

codex-rs/apply-patch/src/parser.rs

Lines changed: 233 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,14 @@ pub fn parse_patch(patch: &str) -> Result<ApplyPatchArgs, ParseError> {
132132
parse_patch_text(patch, mode)
133133
}
134134

135+
/// Parses streamed patch text that may not have reached `*** End Patch` yet.
136+
///
137+
/// This entry point is for progress reporting only; callers must not use its
138+
/// output to apply a patch.
139+
pub fn parse_patch_streaming(patch: &str) -> Result<ApplyPatchArgs, ParseError> {
140+
parse_patch_text(patch, ParseMode::Streaming)
141+
}
142+
135143
enum ParseMode {
136144
/// Parse the patch text argument as is.
137145
Strict,
@@ -169,48 +177,71 @@ enum ParseMode {
169177
/// `<<'EOF'` and ends with `EOF\n`. If so, we strip off these markers,
170178
/// trim() the result, and treat what is left as the patch text.
171179
Lenient,
180+
181+
/// Parse partial patch text for progress reporting while the model is
182+
/// still streaming tool input. This mode requires a begin marker but does
183+
/// not require an end marker, and its output must not be used to apply a
184+
/// patch.
185+
Streaming,
172186
}
173187

174188
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, ParseError> {
175189
let lines: Vec<&str> = patch.trim().lines().collect();
176-
let lines: &[&str] = match check_patch_boundaries_strict(&lines) {
177-
Ok(()) => &lines,
178-
Err(e) => match mode {
179-
ParseMode::Strict => {
180-
return Err(e);
181-
}
182-
ParseMode::Lenient => check_patch_boundaries_lenient(&lines, e)?,
183-
},
190+
let (patch_lines, hunk_lines) = match mode {
191+
ParseMode::Strict => check_patch_boundaries_strict(&lines)?,
192+
ParseMode::Lenient => check_patch_boundaries_lenient(&lines)?,
193+
ParseMode::Streaming => check_patch_boundaries_streaming(&lines)?,
184194
};
185195

186196
let mut hunks: Vec<Hunk> = Vec::new();
187-
// The above checks ensure that lines.len() >= 2.
188-
let last_line_index = lines.len().saturating_sub(1);
189-
let mut remaining_lines = &lines[1..last_line_index];
197+
let mut remaining_lines = hunk_lines;
190198
let mut line_number = 2;
199+
let allow_incomplete = matches!(mode, ParseMode::Streaming);
191200
while !remaining_lines.is_empty() {
192-
let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number)?;
201+
let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number, allow_incomplete)?;
193202
hunks.push(hunk);
194203
line_number += hunk_lines;
195204
remaining_lines = &remaining_lines[hunk_lines..]
196205
}
197-
let patch = lines.join("\n");
206+
let patch = patch_lines.join("\n");
198207
Ok(ApplyPatchArgs {
199208
hunks,
200209
patch,
201210
workdir: None,
202211
})
203212
}
204213

214+
fn check_patch_boundaries_streaming<'a>(
215+
original_lines: &'a [&'a str],
216+
) -> Result<(&'a [&'a str], &'a [&'a str]), ParseError> {
217+
match original_lines {
218+
[first, ..] if first.trim() == BEGIN_PATCH_MARKER => {
219+
let body_lines = if original_lines
220+
.last()
221+
.is_some_and(|line| line.trim() == END_PATCH_MARKER)
222+
{
223+
&original_lines[1..original_lines.len() - 1]
224+
} else {
225+
&original_lines[1..]
226+
};
227+
Ok((original_lines, body_lines))
228+
}
229+
_ => check_patch_boundaries_strict(original_lines),
230+
}
231+
}
232+
205233
/// Checks the start and end lines of the patch text for `apply_patch`,
206234
/// returning an error if they do not match the expected markers.
207-
fn check_patch_boundaries_strict(lines: &[&str]) -> Result<(), ParseError> {
235+
fn check_patch_boundaries_strict<'a>(
236+
lines: &'a [&'a str],
237+
) -> Result<(&'a [&'a str], &'a [&'a str]), ParseError> {
208238
let (first_line, last_line) = match lines {
209239
[] => (None, None),
210240
[first] => (Some(first), Some(first)),
211241
[first, .., last] => (Some(first), Some(last)),
212242
};
213-
check_start_and_end_lines_strict(first_line, last_line)
243+
check_start_and_end_lines_strict(first_line, last_line)?;
244+
Ok((lines, &lines[1..lines.len() - 1]))
214245
}
215246

216247
/// If we are in lenient mode, we check if the first line starts with `<<EOF`
@@ -222,19 +253,20 @@ fn check_patch_boundaries_strict(lines: &[&str]) -> Result<(), ParseError> {
222253
/// contents, excluding the heredoc markers.
223254
fn check_patch_boundaries_lenient<'a>(
224255
original_lines: &'a [&'a str],
225-
original_parse_error: ParseError,
226-
) -> Result<&'a [&'a str], ParseError> {
256+
) -> Result<(&'a [&'a str], &'a [&'a str]), ParseError> {
257+
let original_parse_error = match check_patch_boundaries_strict(original_lines) {
258+
Ok(lines) => return Ok(lines),
259+
Err(e) => e,
260+
};
261+
227262
match original_lines {
228263
[first, .., last] => {
229264
if (first == &"<<EOF" || first == &"<<'EOF'" || first == &"<<\"EOF\"")
230265
&& last.ends_with("EOF")
231266
&& original_lines.len() >= 4
232267
{
233268
let inner_lines = &original_lines[1..original_lines.len() - 1];
234-
match check_patch_boundaries_strict(inner_lines) {
235-
Ok(()) => Ok(inner_lines),
236-
Err(e) => Err(e),
237-
}
269+
check_patch_boundaries_strict(inner_lines)
238270
} else {
239271
Err(original_parse_error)
240272
}
@@ -265,7 +297,11 @@ fn check_start_and_end_lines_strict(
265297

266298
/// Attempts to parse a single hunk from the start of lines.
267299
/// Returns the parsed hunk and the number of lines parsed (or a ParseError).
268-
fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {
300+
fn parse_one_hunk(
301+
lines: &[&str],
302+
line_number: usize,
303+
allow_incomplete: bool,
304+
) -> Result<(Hunk, usize), ParseError> {
269305
// Be tolerant of case mismatches and extra padding around marker strings.
270306
let first_line = lines[0].trim();
271307
if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) {
@@ -321,15 +357,26 @@ fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), P
321357
continue;
322358
}
323359

324-
if remaining_lines[0].starts_with("***") {
360+
if remaining_lines[0].starts_with('*') {
325361
break;
326362
}
327363

328-
let (chunk, chunk_lines) = parse_update_file_chunk(
364+
if allow_incomplete && remaining_lines[0] == "@" {
365+
break;
366+
}
367+
368+
let parsed_chunk = parse_update_file_chunk(
329369
remaining_lines,
330370
line_number + parsed_lines,
331371
chunks.is_empty(),
332-
)?;
372+
);
373+
let (chunk, chunk_lines) = match parsed_chunk {
374+
Ok(parsed) => parsed,
375+
Err(InvalidHunkError { .. }) if allow_incomplete && !chunks.is_empty() => {
376+
break;
377+
}
378+
Err(err) => return Err(err),
379+
};
333380
chunks.push(chunk);
334381
parsed_lines += chunk_lines;
335382
remaining_lines = &remaining_lines[chunk_lines..]
@@ -453,6 +500,166 @@ fn parse_update_file_chunk(
453500
Ok((chunk, parsed_lines + start_index))
454501
}
455502

503+
#[test]
504+
fn test_parse_patch_streaming() {
505+
assert_eq!(
506+
parse_patch_streaming("*** Begin Patch\n*** Add File: src/hello.txt\n+hello\n+wor"),
507+
Ok(ApplyPatchArgs {
508+
hunks: vec![AddFile {
509+
path: PathBuf::from("src/hello.txt"),
510+
contents: "hello\nwor\n".to_string(),
511+
}],
512+
patch: "*** Begin Patch\n*** Add File: src/hello.txt\n+hello\n+wor".to_string(),
513+
workdir: None,
514+
})
515+
);
516+
517+
assert_eq!(
518+
parse_patch_streaming(
519+
"*** Begin Patch\n*** Update File: src/old.rs\n*** Move to: src/new.rs\n@@\n-old\n+new",
520+
),
521+
Ok(ApplyPatchArgs {
522+
hunks: vec![UpdateFile {
523+
path: PathBuf::from("src/old.rs"),
524+
move_path: Some(PathBuf::from("src/new.rs")),
525+
chunks: vec![UpdateFileChunk {
526+
change_context: None,
527+
old_lines: vec!["old".to_string()],
528+
new_lines: vec!["new".to_string()],
529+
is_end_of_file: false,
530+
}],
531+
}],
532+
patch: "*** Begin Patch\n*** Update File: src/old.rs\n*** Move to: src/new.rs\n@@\n-old\n+new".to_string(),
533+
workdir: None,
534+
})
535+
);
536+
537+
assert!(
538+
parse_patch_text(
539+
"*** Begin Patch\n*** Delete File: gone.txt",
540+
ParseMode::Streaming
541+
)
542+
.is_ok()
543+
);
544+
assert!(
545+
parse_patch_text(
546+
"*** Begin Patch\n*** Delete File: gone.txt",
547+
ParseMode::Strict
548+
)
549+
.is_err()
550+
);
551+
552+
assert_eq!(
553+
parse_patch_streaming(
554+
"*** Begin Patch\n*** Add File: src/one.txt\n+one\n*** Delete File: src/two.txt\n",
555+
),
556+
Ok(ApplyPatchArgs {
557+
hunks: vec![
558+
AddFile {
559+
path: PathBuf::from("src/one.txt"),
560+
contents: "one\n".to_string(),
561+
},
562+
DeleteFile {
563+
path: PathBuf::from("src/two.txt"),
564+
},
565+
],
566+
patch: "*** Begin Patch\n*** Add File: src/one.txt\n+one\n*** Delete File: src/two.txt"
567+
.to_string(),
568+
workdir: None,
569+
})
570+
);
571+
}
572+
573+
#[test]
574+
fn test_parse_patch_streaming_large_patch_by_character() {
575+
let patch = "\
576+
*** Begin Patch
577+
*** Add File: docs/release-notes.md
578+
+# Release notes
579+
+
580+
+## CLI
581+
+- Surface apply_patch progress while arguments stream.
582+
+- Keep final patch application gated on the completed tool call.
583+
+- Include file summaries in the progress event payload.
584+
*** Update File: src/config.rs
585+
@@ impl Config
586+
- pub apply_patch_progress: bool,
587+
+ pub stream_apply_patch_progress: bool,
588+
pub include_diagnostics: bool,
589+
@@ fn default_progress_interval()
590+
- Duration::from_millis(500)
591+
+ Duration::from_millis(250)
592+
*** Delete File: src/legacy_patch_progress.rs
593+
*** Update File: crates/cli/src/main.rs
594+
*** Move to: crates/cli/src/bin/codex.rs
595+
@@ fn run()
596+
- let args = Args::parse();
597+
- dispatch(args)
598+
+ let cli = Cli::parse();
599+
+ dispatch(cli)
600+
*** Add File: tests/fixtures/apply_patch_progress.json
601+
+{
602+
+ \"type\": \"apply_patch_progress\",
603+
+ \"hunks\": [
604+
+ { \"operation\": \"add\", \"path\": \"docs/release-notes.md\" },
605+
+ { \"operation\": \"update\", \"path\": \"src/config.rs\" }
606+
+ ]
607+
+}
608+
*** Update File: README.md
609+
@@ Development workflow
610+
Build the Rust workspace before opening a pull request.
611+
+When touching streamed tool calls, include parser coverage for partial input.
612+
+Prefer tests that exercise the exact event payload shape.
613+
*** Delete File: docs/old-apply-patch-progress.md
614+
*** End Patch";
615+
616+
let mut max_hunk_count = 0;
617+
let mut saw_hunk_counts = Vec::new();
618+
for i in 1..=patch.len() {
619+
let partial = &patch[..i];
620+
if let Ok(parsed) = parse_patch_streaming(partial) {
621+
let hunk_count = parsed.hunks.len();
622+
assert!(
623+
hunk_count >= max_hunk_count,
624+
"hunk count should never decrease while streaming: {hunk_count} < {max_hunk_count} for {partial:?}",
625+
);
626+
if hunk_count > max_hunk_count {
627+
saw_hunk_counts.push(hunk_count);
628+
max_hunk_count = hunk_count;
629+
}
630+
}
631+
}
632+
633+
assert_eq!(saw_hunk_counts, vec![1, 2, 3, 4, 5, 6, 7]);
634+
let parsed = parse_patch_streaming(patch).unwrap();
635+
assert_eq!(parsed.hunks.len(), 7);
636+
assert_eq!(
637+
parsed
638+
.hunks
639+
.iter()
640+
.map(|hunk| match hunk {
641+
AddFile { .. } => "add",
642+
DeleteFile { .. } => "delete",
643+
UpdateFile {
644+
move_path: Some(_), ..
645+
} => "move-update",
646+
UpdateFile {
647+
move_path: None, ..
648+
} => "update",
649+
})
650+
.collect::<Vec<_>>(),
651+
vec![
652+
"add",
653+
"update",
654+
"delete",
655+
"move-update",
656+
"add",
657+
"update",
658+
"delete"
659+
]
660+
);
661+
}
662+
456663
#[test]
457664
fn test_parse_patch() {
458665
assert_eq!(
@@ -794,7 +1001,7 @@ fn test_parse_patch_lenient() {
7941001
#[test]
7951002
fn test_parse_one_hunk() {
7961003
assert_eq!(
797-
parse_one_hunk(&["bad"], /*line_number*/ 234),
1004+
parse_one_hunk(&["bad"], /*line_number*/ 234, /*allow_incomplete*/ false),
7981005
Err(InvalidHunkError {
7991006
message: "'bad' is not a valid hunk header. \
8001007
Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'".to_string(),

0 commit comments

Comments
 (0)