From 080e4024780d2a8f730a81179783b672029c50c6 Mon Sep 17 00:00:00 2001 From: Imamuzzaki Abu Salam Date: Tue, 2 Jun 2026 11:46:00 +0700 Subject: [PATCH 1/3] fix(domain): collapse system messages into a single one (#2894) `Context::set_system_messages` previously inserted each entry in `content` as a separate `ContextMessage::system(...)`, producing N system messages in the resulting chat. Chat templates such as Qwen3.5/3.6 (used by llama.cpp and vLLM) only allow a single system message at `messages[0]` and raise `raise_exception('System message must be at the beginning.')` for any additional one, breaking those providers entirely. Join all entries into a single system message (separated by `\n\n`) and insert it once at position 0. Empty payloads are filtered out and an all-empty `content` vector leaves the context untouched. Fixes #2894, also resolves the duplicate #2586 (qwen3.5-27b llama.cpp). Co-Authored-By: ForgeCode --- crates/forge_domain/src/context.rs | 104 ++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 16 deletions(-) diff --git a/crates/forge_domain/src/context.rs b/crates/forge_domain/src/context.rs index c2f0f30fde..55dba3a742 100644 --- a/crates/forge_domain/src/context.rs +++ b/crates/forge_domain/src/context.rs @@ -524,24 +524,37 @@ impl Context { self } - /// Updates the set system message + /// Replaces any existing system messages with the provided content. + /// + /// All entries in `content` are joined into a single system message + /// (separated by `\n\n`) and inserted at position 0 of the message list. + /// This is required for compatibility with chat templates — such as + /// Qwen3.5/3.6 (llama.cpp, vLLM) — that only allow a single system + /// message at `messages[0]` and raise `raise_exception('System message + /// must be at the beginning.')` for any additional system message. pub fn set_system_messages>(mut self, content: Vec) -> Self { - if self.messages.is_empty() { - for message in content { - self.messages - .push(ContextMessage::system(message.into()).into()); - } - self - } else { - // drop all the system messages; - self.messages.retain(|m| !m.has_role(Role::System)); - // add the system message at the beginning. - for message in content.into_iter().rev() { - self.messages - .insert(0, ContextMessage::system(message.into()).into()); - } - self + // Drop every existing system message regardless of the new payload. + self.messages.retain(|m| !m.has_role(Role::System)); + + // Combine all provided system message entries into a single block. + let combined: String = content + .into_iter() + .map(Into::into) + .filter(|s: &String| !s.is_empty()) + .collect::>() + .join("\n\n"); + + if combined.is_empty() { + return self; } + + // Insert the single, combined system message at the beginning of the + // message list, so chat templates that require `messages[0]` to be the + // (only) system message continue to work. + self.messages + .insert(0, ContextMessage::system(combined).into()); + + self } /// Converts the context to textual format @@ -849,6 +862,65 @@ mod tests { ); } + /// Regression test for #2894: chat templates such as Qwen3.5/3.6 (used by + /// llama.cpp and vLLM) only allow a single system message at + /// `messages[0]` and raise `raise_exception('System message must be at the + /// beginning.')` for any additional one. Therefore `set_system_messages` + /// must always produce exactly one system message in the context, no + /// matter how many entries are passed in. + #[test] + fn test_set_system_messages_collapses_into_single_message() { + // Fixture: multiple distinct system message blocks (mirroring what + // `system_prompt.rs` passes: the static agent prompt + the + // non-static agent template). + let request = Context::default().set_system_messages(vec![ + "Static agent prompt", + "Non-static agent template", + ]); + + let expected = ContextMessage::system("Static agent prompt\n\nNon-static agent template") + .into(); + + // The first message must be the single, combined system message. + assert_eq!(request.messages[0], expected); + + // And there must be exactly one system message in the whole context. + let system_count = request + .messages + .iter() + .filter(|m| m.has_role(Role::System)) + .count(); + assert_eq!(system_count, 1); + } + + /// Regression test for #2894: when called on a context that already + /// contains a pre-existing system message (e.g. an injected one from + /// earlier in the pipeline), the call must replace ALL of them with a + /// single combined message — never append a second one. + #[test] + fn test_set_system_messages_replaces_existing_single_system_message() { + let model = ModelId::new("test-model"); + let request = Context::default() + .add_message(ContextMessage::system("Pre-existing system message")) + .add_message(ContextMessage::user("Do something", Some(model))) + .set_system_messages(vec!["First", "Second"]); + + // The pre-existing system message must have been replaced (not + // retained or duplicated). + let system_count = request + .messages + .iter() + .filter(|m| m.has_role(Role::System)) + .count(); + assert_eq!(system_count, 1); + + // The single remaining system message must be the combined payload. + assert_eq!( + request.messages[0], + ContextMessage::system("First\n\nSecond").into(), + ); + } + #[test] fn test_estimate_token_count() { // Create a context with some messages From b28c5ff8ee5c328d62a0a2c242f3a3a70974fb19 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 2 Jun 2026 04:49:15 +0000 Subject: [PATCH 2/3] [autofix.ci] apply automated fixes --- crates/forge_domain/src/context.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/crates/forge_domain/src/context.rs b/crates/forge_domain/src/context.rs index 55dba3a742..97ced467de 100644 --- a/crates/forge_domain/src/context.rs +++ b/crates/forge_domain/src/context.rs @@ -873,13 +873,11 @@ mod tests { // Fixture: multiple distinct system message blocks (mirroring what // `system_prompt.rs` passes: the static agent prompt + the // non-static agent template). - let request = Context::default().set_system_messages(vec![ - "Static agent prompt", - "Non-static agent template", - ]); + let request = Context::default() + .set_system_messages(vec!["Static agent prompt", "Non-static agent template"]); - let expected = ContextMessage::system("Static agent prompt\n\nNon-static agent template") - .into(); + let expected = + ContextMessage::system("Static agent prompt\n\nNon-static agent template").into(); // The first message must be the single, combined system message. assert_eq!(request.messages[0], expected); From ddf21d33d5bf74b801f4b13b152d429f5b29181e Mon Sep 17 00:00:00 2001 From: Imamuzzaki Abu Salam Date: Tue, 2 Jun 2026 11:57:39 +0700 Subject: [PATCH 3/3] fix(domain): address review feedback on #3436 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Applied suggestions from the Copilot review on PR #3436: 1. Doc comments no longer wrap the Jinja exception `raise_exception('System message must be at the beginning.')` across a line break inside a single inline code span — both occurrences now use a fenced `text` code block so rustdoc/CommonMark render as intended. 2. The semantic of an empty `content` payload is now explicitly part of the public contract of `set_system_messages`: it clears every existing system message and inserts no replacement. This matches the pre-#2894 behavior on the non-empty branch, but it was previously undocumented. Added two regression tests (`..._with_empty_payload_clears_existing` and `..._with_only_empty_strings_clears_existing`) to lock it in. 3. The combined system message is now built in a single `String` allocation (push_str + manual separator insertion) instead of the previous `collect::>().join(...)` approach, which materialised an intermediate `Vec` for no benefit. Co-Authored-By: ForgeCode --- crates/forge_domain/src/context.rs | 100 +++++++++++++++++++++++------ 1 file changed, 81 insertions(+), 19 deletions(-) diff --git a/crates/forge_domain/src/context.rs b/crates/forge_domain/src/context.rs index 97ced467de..49142d0585 100644 --- a/crates/forge_domain/src/context.rs +++ b/crates/forge_domain/src/context.rs @@ -526,31 +526,48 @@ impl Context { /// Replaces any existing system messages with the provided content. /// - /// All entries in `content` are joined into a single system message - /// (separated by `\n\n`) and inserted at position 0 of the message list. - /// This is required for compatibility with chat templates — such as - /// Qwen3.5/3.6 (llama.cpp, vLLM) — that only allow a single system - /// message at `messages[0]` and raise `raise_exception('System message - /// must be at the beginning.')` for any additional system message. + /// All non-empty entries in `content` are joined into a single system + /// message (separated by `\n\n`) and inserted at position 0 of the + /// message list. This is required for compatibility with chat templates + /// — such as Qwen3.5/3.6 served by llama.cpp or vLLM — that only allow + /// a single system message at `messages[0]` and raise the Jinja + /// exception shown below for any additional system message. + /// + /// If `content` is empty (or every entry is an empty string), every + /// existing system message is removed and no replacement is inserted. + /// Callers that want to preserve an existing system message should not + /// invoke this method with an empty payload. + /// + /// ```text + /// raise_exception('System message must be at the beginning.') + /// ``` pub fn set_system_messages>(mut self, content: Vec) -> Self { // Drop every existing system message regardless of the new payload. self.messages.retain(|m| !m.has_role(Role::System)); - // Combine all provided system message entries into a single block. - let combined: String = content + // Combine all provided system message entries into a single block, + // skipping empty entries and inserting the `\n\n` separator only + // between non-empty entries — all in a single allocation. + let mut combined = String::new(); + let mut iter = content .into_iter() .map(Into::into) - .filter(|s: &String| !s.is_empty()) - .collect::>() - .join("\n\n"); + .filter(|s: &String| !s.is_empty()); + if let Some(first) = iter.next() { + combined.push_str(&first); + for s in iter { + combined.push_str("\n\n"); + combined.push_str(&s); + } + } if combined.is_empty() { return self; } // Insert the single, combined system message at the beginning of the - // message list, so chat templates that require `messages[0]` to be the - // (only) system message continue to work. + // message list, so chat templates that require `messages[0]` to be + // the (only) system message continue to work. self.messages .insert(0, ContextMessage::system(combined).into()); @@ -862,12 +879,16 @@ mod tests { ); } - /// Regression test for #2894: chat templates such as Qwen3.5/3.6 (used by - /// llama.cpp and vLLM) only allow a single system message at - /// `messages[0]` and raise `raise_exception('System message must be at the - /// beginning.')` for any additional one. Therefore `set_system_messages` - /// must always produce exactly one system message in the context, no - /// matter how many entries are passed in. + /// Regression test for #2894: chat templates such as Qwen3.5/3.6 + /// (served by llama.cpp or vLLM) only allow a single system message at + /// `messages[0]` and raise the Jinja exception shown below for any + /// additional one. Therefore `set_system_messages` must always produce + /// exactly one system message in the context, no matter how many entries + /// are passed in. + /// + /// ```text + /// raise_exception('System message must be at the beginning.') + /// ``` #[test] fn test_set_system_messages_collapses_into_single_message() { // Fixture: multiple distinct system message blocks (mirroring what @@ -919,6 +940,47 @@ mod tests { ); } + /// An empty `content` (or a vector of only empty strings) removes every + /// existing system message and inserts no replacement. This matches the + /// pre-#2894 behavior on the non-empty branch and is now part of the + /// public contract of `set_system_messages`. + #[test] + fn test_set_system_messages_with_empty_payload_clears_existing() { + let model = ModelId::new("test-model"); + let request = Context::default() + .add_message(ContextMessage::system("Pre-existing system message")) + .add_message(ContextMessage::user("Do something", Some(model))) + .set_system_messages(Vec::::new()); + + // No system message should remain after the call. + let system_count = request + .messages + .iter() + .filter(|m| m.has_role(Role::System)) + .count(); + assert_eq!(system_count, 0); + + // The user message must be preserved. + assert_eq!(request.messages.len(), 1); + } + + /// When every entry in `content` is an empty string, the result must be + /// the same as passing an empty `content`: no system message is + /// inserted. + #[test] + fn test_set_system_messages_with_only_empty_strings_clears_existing() { + let request = Context::default() + .add_message(ContextMessage::system("Pre-existing system message")) + .set_system_messages(vec!["", "", ""]); + + let system_count = request + .messages + .iter() + .filter(|m| m.has_role(Role::System)) + .count(); + assert_eq!(system_count, 0); + } + #[test] fn test_estimate_token_count() { // Create a context with some messages