openai · aibrahim-oai · Nov 15, 2025 · Nov 14, 2025 · Nov 14, 2025 · Nov 15, 2025
@@ -1819,7 +1819,6 @@ pub(crate) async fn run_task(
     // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
     // many turns, from the perspective of the user, it is a single turn.
     let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
-    let mut auto_compact_recently_attempted = false;
 
     loop {
         // Note that pending_input would be something like a message the user
@@ -1874,27 +1873,12 @@ pub(crate) async fn run_task(
                 let (responses, items_to_record_in_conversation_history) =
                     process_items(processed_items, &sess, &turn_context).await;
 
+                // as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
                 if token_limit_reached {
-                    if auto_compact_recently_attempted {
-                        let limit_str = limit.to_string();
-                        let current_tokens = total_usage_tokens
-                            .map(|tokens| tokens.to_string())
-                            .unwrap_or_else(|| "unknown".to_string());
-                        let event = EventMsg::Error(ErrorEvent {
-                            message: format!(
-                                "Conversation is still above the token limit after automatic summarization (limit {limit_str}, current {current_tokens}). Please start a new session or trim your input."
-                            ),
-                        });
-                        sess.send_event(&turn_context, event).await;
-                        break;
-                    }
-                    auto_compact_recently_attempted = true;
                     compact::run_inline_auto_compact_task(sess.clone(), turn_context.clone()).await;
                     continue;
                 }
 
-                auto_compact_recently_attempted = false;
-
                 if responses.is_empty() {
                     last_agent_message = get_last_assistant_message_from_turn(
                         &items_to_record_in_conversation_history,

@@ -26,6 +26,7 @@ use futures::prelude::*;
 use tracing::error;
 
 pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt.md");
+pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md");
 const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
 
 pub(crate) async fn run_inline_auto_compact_task(
@@ -140,7 +141,9 @@ async fn run_compact_task_inner(
     }
 
     let history_snapshot = sess.clone_history().await.get_history();
-    let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
+    let summary_suffix =
+        get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
+    let summary_text = format!("{SUMMARY_PREFIX}\n{summary_suffix}");
     let user_messages = collect_user_messages(&history_snapshot);
 
     let initial_context = sess.build_initial_context(turn_context.as_ref());
@@ -201,12 +204,22 @@ pub(crate) fn collect_user_messages(items: &[ResponseItem]) -> Vec<String> {
     items
         .iter()
         .filter_map(|item| match crate::event_mapping::parse_turn_item(item) {
-            Some(TurnItem::UserMessage(user)) => Some(user.message()),
+            Some(TurnItem::UserMessage(user)) => {
+                if is_summary_message(&user.message()) {
+                    None
+                } else {
+                    Some(user.message())
+                }
+            }
             _ => None,
         })
         .collect()
 }
 
+pub(crate) fn is_summary_message(message: &str) -> bool {
+    message.starts_with(format!("{SUMMARY_PREFIX}\n").as_str())
+}
+
 pub(crate) fn build_compacted_history(
     initial_context: Vec<ResponseItem>,
     user_messages: &[String],

@@ -0,0 +1 @@
+Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis: