From 93aa7fa059004aa160351ea06b4eec15606084bb Mon Sep 17 00:00:00 2001 From: JARVIS-coding-Agent Date: Fri, 10 Apr 2026 04:54:07 +0000 Subject: [PATCH 1/3] feat: add markdown table conversion pipeline with pulldown-cmark - Introduce pulldown-cmark as markdown parser for accurate table detection - Add TableMode config (code/bullets/off) via [markdown] section in config.toml - Convert detected tables before sending final content to Discord - Design as reusable pipeline for future multi-channel support Closes #178 --- Cargo.toml | 1 + config.toml.example | 4 + src/config.rs | 15 +++ src/discord.rs | 7 +- src/main.rs | 2 + src/markdown.rs | 300 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 src/markdown.rs diff --git a/Cargo.toml b/Cargo.toml index edfdf87..ef5a266 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ uuid = { version = "1", features = ["v4"] } regex = "1" anyhow = "1" rand = "0.8" +pulldown-cmark = { version = "0.13", default-features = false } diff --git a/config.toml.example b/config.toml.example index c4227dc..7e3f76c 100644 --- a/config.toml.example +++ b/config.toml.example @@ -48,3 +48,7 @@ stall_soft_ms = 10000 stall_hard_ms = 30000 done_hold_ms = 1500 error_hold_ms = 2500 + +[markdown] +# How to render markdown tables: "code" (fenced code block), "bullets", or "off" +tables = "code" diff --git a/src/config.rs b/src/config.rs index 719feaf..3b565fc 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,3 +1,4 @@ +use crate::markdown::TableMode; use regex::Regex; use serde::Deserialize; use std::collections::HashMap; @@ -11,6 +12,8 @@ pub struct Config { pub pool: PoolConfig, #[serde(default)] pub reactions: ReactionsConfig, + #[serde(default)] + pub markdown: MarkdownConfig, } #[derive(Debug, Deserialize)] @@ -20,6 +23,18 @@ pub struct DiscordConfig { pub allowed_channels: Vec, } +#[derive(Debug, Clone, Deserialize)] +pub struct MarkdownConfig { + #[serde(default)] + pub tables: TableMode, +} + +impl Default for MarkdownConfig { + fn default() -> Self { + Self { tables: TableMode::default() } + } +} + #[derive(Debug, Deserialize)] pub struct AgentConfig { pub command: String, diff --git a/src/discord.rs b/src/discord.rs index da52c69..0a9be01 100644 --- a/src/discord.rs +++ b/src/discord.rs @@ -1,6 +1,7 @@ use crate::acp::{classify_notification, AcpEvent, SessionPool}; -use crate::config::ReactionsConfig; +use crate::config::{MarkdownConfig, ReactionsConfig}; use crate::format; +use crate::markdown; use crate::reactions::StatusReactionController; use serenity::async_trait; use serenity::model::channel::Message; @@ -16,6 +17,7 @@ pub struct Handler { pub pool: Arc, pub allowed_channels: HashSet, pub reactions_config: ReactionsConfig, + pub markdown_config: MarkdownConfig, } #[async_trait] @@ -143,6 +145,7 @@ impl EventHandler for Handler { thread_channel, thinking_msg.id, reactions.clone(), + self.markdown_config.tables, ) .await; @@ -187,6 +190,7 @@ async fn stream_prompt( channel: ChannelId, msg_id: MessageId, reactions: Arc, + table_mode: markdown::TableMode, ) -> anyhow::Result<()> { let prompt = prompt.to_string(); let reactions = reactions.clone(); @@ -296,6 +300,7 @@ async fn stream_prompt( // Final edit let final_content = compose_display(&tool_lines, &text_buf); + let final_content = markdown::convert_tables(&final_content, table_mode); let final_content = if final_content.is_empty() { "_(no response)_".to_string() } else { diff --git a/src/main.rs b/src/main.rs index a216b66..a69ff91 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ mod acp; mod config; mod discord; mod format; +mod markdown; mod reactions; use serenity::prelude::*; @@ -47,6 +48,7 @@ async fn main() -> anyhow::Result<()> { pool: pool.clone(), allowed_channels, reactions_config: cfg.reactions, + markdown_config: cfg.markdown, }; let intents = GatewayIntents::GUILD_MESSAGES diff --git a/src/markdown.rs b/src/markdown.rs new file mode 100644 index 0000000..8328d05 --- /dev/null +++ b/src/markdown.rs @@ -0,0 +1,300 @@ +use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd}; +use serde::Deserialize; +use std::fmt; + +/// How to render markdown tables for a given channel. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum TableMode { + /// Wrap the table in a fenced code block (default). + Code, + /// Convert each row into bullet points. + Bullets, + /// Pass through unchanged. + Off, +} + +impl Default for TableMode { + fn default() -> Self { + Self::Code + } +} + +impl fmt::Display for TableMode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Code => write!(f, "code"), + Self::Bullets => write!(f, "bullets"), + Self::Off => write!(f, "off"), + } + } +} + +// ── IR types ──────────────────────────────────────────────────────── + +/// A parsed table: header row + data rows, each cell is plain text. +struct Table { + headers: Vec, + rows: Vec>, +} + +/// Segment of the document — either verbatim text or a parsed table. +enum Segment { + Text(String), + Table(Table), +} + +// ── Public API ────────────────────────────────────────────────────── + +/// Parse markdown, detect tables via pulldown-cmark, and render them +/// according to `mode`. Non-table content passes through unchanged. +pub fn convert_tables(markdown: &str, mode: TableMode) -> String { + if mode == TableMode::Off || markdown.is_empty() { + return markdown.to_string(); + } + + let segments = parse_segments(markdown); + + let mut out = String::with_capacity(markdown.len()); + for seg in segments { + match seg { + Segment::Text(t) => out.push_str(&t), + Segment::Table(table) => match mode { + TableMode::Code => render_table_code(&table, &mut out), + TableMode::Bullets => render_table_bullets(&table, &mut out), + TableMode::Off => unreachable!(), + }, + } + } + out +} + +// ── Parser ────────────────────────────────────────────────────────── + +/// Walk the markdown source with pulldown-cmark and split it into +/// text segments and parsed Table segments. +fn parse_segments(markdown: &str) -> Vec { + let mut opts = Options::empty(); + opts.insert(Options::ENABLE_TABLES); + + let mut segments: Vec = Vec::new(); + let mut in_table = false; + let mut in_head = false; + let mut headers: Vec = Vec::new(); + let mut rows: Vec> = Vec::new(); + let mut current_row: Vec = Vec::new(); + let mut cell_buf = String::new(); + let mut last_table_end: usize = 0; + + // We need byte offsets to grab non-table text verbatim. + let parser_with_offsets = Parser::new_ext(markdown, opts).into_offset_iter(); + + for (event, range) in parser_with_offsets { + match event { + Event::Start(Tag::Table(_)) => { + // Flush text before this table + let before = &markdown[last_table_end..range.start]; + if !before.is_empty() { + push_text(&mut segments, before); + } + in_table = true; + headers.clear(); + rows.clear(); + } + Event::End(TagEnd::Table) => { + let table = Table { + headers: std::mem::take(&mut headers), + rows: std::mem::take(&mut rows), + }; + segments.push(Segment::Table(table)); + in_table = false; + last_table_end = range.end; + } + Event::Start(Tag::TableHead) => { + in_head = true; + current_row.clear(); + } + Event::End(TagEnd::TableHead) => { + headers = std::mem::take(&mut current_row); + in_head = false; + } + Event::Start(Tag::TableRow) => { + current_row.clear(); + } + Event::End(TagEnd::TableRow) => { + if !in_head { + rows.push(std::mem::take(&mut current_row)); + } + } + Event::Start(Tag::TableCell) => { + cell_buf.clear(); + } + Event::End(TagEnd::TableCell) => { + current_row.push(cell_buf.trim().to_string()); + cell_buf.clear(); + } + Event::Text(t) if in_table => { + cell_buf.push_str(&t); + } + Event::Code(t) if in_table => { + cell_buf.push('`'); + cell_buf.push_str(&t); + cell_buf.push('`'); + } + _ => {} + } + } + + // Remaining text after last table + if last_table_end < markdown.len() { + let tail = &markdown[last_table_end..]; + if !tail.is_empty() { + push_text(&mut segments, tail); + } + } + + segments +} + +fn push_text(segments: &mut Vec, text: &str) { + if let Some(Segment::Text(ref mut prev)) = segments.last_mut() { + prev.push_str(text); + } else { + segments.push(Segment::Text(text.to_string())); + } +} + +// ── Renderers ─────────────────────────────────────────────────────── + +/// Render table as a fenced code block with aligned columns. +fn render_table_code(table: &Table, out: &mut String) { + let col_count = table + .headers + .len() + .max(table.rows.iter().map(|r| r.len()).max().unwrap_or(0)); + if col_count == 0 { + return; + } + + // Compute column widths + let mut widths = vec![0usize; col_count]; + for (i, h) in table.headers.iter().enumerate() { + widths[i] = widths[i].max(h.len()); + } + for row in &table.rows { + for (i, cell) in row.iter().enumerate() { + if i < col_count { + widths[i] = widths[i].max(cell.len()); + } + } + } + // Minimum width 3 for the divider + for w in &mut widths { + *w = (*w).max(3); + } + + out.push_str("```\n"); + + // Header row + write_row(out, &table.headers, &widths, col_count); + // Divider + out.push('|'); + for w in &widths { + out.push(' '); + for _ in 0..*w { + out.push('-'); + } + out.push_str(" |"); + } + out.push('\n'); + // Data rows + for row in &table.rows { + write_row(out, row, &widths, col_count); + } + + out.push_str("```\n"); +} + +fn write_row(out: &mut String, cells: &[String], widths: &[usize], col_count: usize) { + out.push('|'); + for i in 0..col_count { + out.push(' '); + let cell = cells.get(i).map(|s| s.as_str()).unwrap_or(""); + out.push_str(cell); + let pad = widths[i] - cell.len(); + for _ in 0..pad { + out.push(' '); + } + out.push_str(" |"); + } + out.push('\n'); +} + +/// Render table as bullet points: `• header: value` per cell. +fn render_table_bullets(table: &Table, out: &mut String) { + for row in &table.rows { + for (i, cell) in row.iter().enumerate() { + if cell.is_empty() { + continue; + } + out.push_str("• "); + if let Some(h) = table.headers.get(i) { + if !h.is_empty() { + out.push_str(h); + out.push_str(": "); + } + } + out.push_str(cell); + out.push('\n'); + } + out.push('\n'); + } +} + +// ── Tests ─────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + const TABLE_MD: &str = "\ +Some text before. + +| Name | Age | +|-------|-----| +| Alice | 30 | +| Bob | 25 | + +Some text after. +"; + + #[test] + fn off_mode_passes_through() { + let result = convert_tables(TABLE_MD, TableMode::Off); + assert_eq!(result, TABLE_MD); + } + + #[test] + fn code_mode_wraps_in_codeblock() { + let result = convert_tables(TABLE_MD, TableMode::Code); + assert!(result.contains("```\n")); + assert!(result.contains("| Alice")); + assert!(result.contains("Some text before.")); + assert!(result.contains("Some text after.")); + } + + #[test] + fn bullets_mode_converts_to_bullets() { + let result = convert_tables(TABLE_MD, TableMode::Bullets); + assert!(result.contains("• Name: Alice")); + assert!(result.contains("• Age: 30")); + assert!(!result.contains("```")); + } + + #[test] + fn no_table_passes_through() { + let plain = "Hello world\nNo tables here."; + let result = convert_tables(plain, TableMode::Code); + assert_eq!(result, plain); + } +} From 37ffc637af84e869bb5eb81ad2f568f1a484f103 Mon Sep 17 00:00:00 2001 From: JARVIS-coding-Agent Date: Fri, 10 Apr 2026 04:59:43 +0000 Subject: [PATCH 2/3] =?UTF-8?q?fix:=20address=20PR=20review=20=E2=80=94=20?= =?UTF-8?q?unicode=20width,=20inline=20markup,=20trailing=20newline?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use unicode-width crate for column width calculation (fixes CJK/emoji alignment) - Use saturating_sub for padding to prevent underflow - Handle inline markup inside table cells (bold, italic, strikethrough, link) - Convert SoftBreak/HardBreak to space inside cells - Fix trailing blank line after last row in bullets mode --- Cargo.toml | 1 + src/markdown.rs | 35 +++++++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ef5a266..c6c1193 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,4 @@ regex = "1" anyhow = "1" rand = "0.8" pulldown-cmark = { version = "0.13", default-features = false } +unicode-width = "0.2" diff --git a/src/markdown.rs b/src/markdown.rs index 8328d05..d8fa721 100644 --- a/src/markdown.rs +++ b/src/markdown.rs @@ -1,6 +1,7 @@ use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd}; use serde::Deserialize; use std::fmt; +use unicode_width::UnicodeWidthStr; /// How to render markdown tables for a given channel. #[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)] @@ -141,6 +142,24 @@ fn parse_segments(markdown: &str) -> Vec { cell_buf.push_str(&t); cell_buf.push('`'); } + // Inline markup inside cells: collect text, ignore tags + Event::SoftBreak if in_table => { + cell_buf.push(' '); + } + Event::HardBreak if in_table => { + cell_buf.push(' '); + } + // Start/End of inline tags (bold, italic, link, etc.) — skip the + // tag markers but keep processing their child text events above. + Event::Start(Tag::Emphasis) + | Event::Start(Tag::Strong) + | Event::Start(Tag::Strikethrough) + | Event::Start(Tag::Link { .. }) + | Event::End(TagEnd::Emphasis) + | Event::End(TagEnd::Strong) + | Event::End(TagEnd::Strikethrough) + | Event::End(TagEnd::Link) + if in_table => {} _ => {} } } @@ -176,15 +195,15 @@ fn render_table_code(table: &Table, out: &mut String) { return; } - // Compute column widths + // Compute column widths (using display width for CJK/emoji) let mut widths = vec![0usize; col_count]; for (i, h) in table.headers.iter().enumerate() { - widths[i] = widths[i].max(h.len()); + widths[i] = widths[i].max(UnicodeWidthStr::width(h.as_str())); } for row in &table.rows { for (i, cell) in row.iter().enumerate() { if i < col_count { - widths[i] = widths[i].max(cell.len()); + widths[i] = widths[i].max(UnicodeWidthStr::width(cell.as_str())); } } } @@ -221,7 +240,8 @@ fn write_row(out: &mut String, cells: &[String], widths: &[usize], col_count: us out.push(' '); let cell = cells.get(i).map(|s| s.as_str()).unwrap_or(""); out.push_str(cell); - let pad = widths[i] - cell.len(); + let display_width = UnicodeWidthStr::width(cell); + let pad = widths[i].saturating_sub(display_width); for _ in 0..pad { out.push(' '); } @@ -232,7 +252,7 @@ fn write_row(out: &mut String, cells: &[String], widths: &[usize], col_count: us /// Render table as bullet points: `• header: value` per cell. fn render_table_bullets(table: &Table, out: &mut String) { - for row in &table.rows { + for (row_idx, row) in table.rows.iter().enumerate() { for (i, cell) in row.iter().enumerate() { if cell.is_empty() { continue; @@ -247,7 +267,10 @@ fn render_table_bullets(table: &Table, out: &mut String) { out.push_str(cell); out.push('\n'); } - out.push('\n'); + // Blank line between rows, but not after the last one + if row_idx + 1 < table.rows.len() { + out.push('\n'); + } } } From 91d77bb5ed57e8ea5b7f0d032935548c55991551 Mon Sep 17 00:00:00 2001 From: OpenAB Agent Date: Mon, 13 Apr 2026 07:04:43 +0000 Subject: [PATCH 3/3] fix: strip backticks in code mode; split_message is code-fence-aware - parse_segments now takes a mode parameter: in Code mode, Event::Code cells omit the backtick wrapping since the table is already inside a fenced code block and backticks would render as literal characters. Bullets mode keeps backticks as they are valid inline markdown. - split_message now tracks whether the cursor is inside a fenced code block (``` ... ```). When a chunk boundary falls mid-block, the current chunk is closed with ``` and the next chunk is reopened with ```, so each Discord message renders the code block correctly. - Tests added for both fixes. --- src/format.rs | 70 +++++++++++++++++++++++++++++++++++++++++++++++++ src/markdown.rs | 37 +++++++++++++++++++++++--- 2 files changed, 103 insertions(+), 4 deletions(-) diff --git a/src/format.rs b/src/format.rs index 841cf55..77efe4d 100644 --- a/src/format.rs +++ b/src/format.rs @@ -1,5 +1,9 @@ /// Split text into chunks at line boundaries, each <= limit Unicode characters (UTF-8 safe). /// Discord's message limit counts Unicode characters, not bytes. +/// +/// Fenced code blocks (``` ... ```) are handled specially: if a split falls inside a +/// code block, the current chunk is closed with ``` and the next chunk is reopened with +/// ```, so each chunk renders correctly in Discord. pub fn split_message(text: &str, limit: usize) -> Vec { if text.chars().count() <= limit { return vec![text.to_string()]; @@ -8,19 +12,38 @@ pub fn split_message(text: &str, limit: usize) -> Vec { let mut chunks = Vec::new(); let mut current = String::new(); let mut current_len: usize = 0; + let mut in_code_fence = false; for line in text.split('\n') { let line_chars = line.chars().count(); + let is_fence_marker = line.starts_with("```"); + // +1 for the newline if !current.is_empty() && current_len + line_chars + 1 > limit { + if in_code_fence && !is_fence_marker { + // Close the open code fence so this chunk renders correctly. + current.push_str("\n```"); + } chunks.push(current); current = String::new(); current_len = 0; + if in_code_fence && !is_fence_marker { + // Reopen the code fence in the new chunk. + // The newline separator below will join it to the first content line. + current.push_str("```"); + current_len = 3; + } } + if !current.is_empty() { current.push('\n'); current_len += 1; } + + if is_fence_marker { + in_code_fence = !in_code_fence; + } + // If a single line exceeds limit, hard-split on char boundaries if line_chars > limit { for ch in line.chars() { @@ -43,6 +66,53 @@ pub fn split_message(text: &str, limit: usize) -> Vec { chunks } +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn split_under_limit_returns_single_chunk() { + let text = "hello world"; + assert_eq!(split_message(text, 2000), vec![text.to_string()]); + } + + #[test] + fn split_code_fence_closed_and_reopened_across_chunks() { + // Build a fenced code block whose lines exceed the limit when combined. + // Each data line is 100 chars; 21 lines = 2121 chars inside the fence, + // forcing a split mid-block. + let row = format!("| {} |\n", "x".repeat(95)); // 100 chars per row + let mut text = String::from("```\n"); + for _ in 0..21 { + text.push_str(&row); + } + text.push_str("```\n"); + + let chunks = split_message(&text, 2000); + assert!(chunks.len() >= 2, "expected multiple chunks"); + for (i, chunk) in chunks.iter().enumerate() { + let fence_count = chunk.lines().filter(|l| l.starts_with("```")).count(); + assert_eq!( + fence_count % 2, + 0, + "chunk {i} has unmatched code fences:\n{chunk}" + ); + } + } + + #[test] + fn split_does_not_corrupt_content_outside_fence() { + let mut text = String::new(); + for i in 0..30 { + text.push_str(&format!("Line number {i} with some padding text here.\n")); + } + let original_lines: Vec<&str> = text.lines().collect(); + let chunks = split_message(&text, 200); + let rejoined: Vec<&str> = chunks.iter().flat_map(|c| c.lines()).collect(); + assert_eq!(original_lines, rejoined); + } +} + /// Truncate a string to at most `limit` Unicode characters. /// Discord's message limit counts Unicode characters, not bytes. pub fn truncate_chars(s: &str, limit: usize) -> &str { diff --git a/src/markdown.rs b/src/markdown.rs index d8fa721..29d9713 100644 --- a/src/markdown.rs +++ b/src/markdown.rs @@ -54,7 +54,7 @@ pub fn convert_tables(markdown: &str, mode: TableMode) -> String { return markdown.to_string(); } - let segments = parse_segments(markdown); + let segments = parse_segments(markdown, mode); let mut out = String::with_capacity(markdown.len()); for seg in segments { @@ -74,7 +74,7 @@ pub fn convert_tables(markdown: &str, mode: TableMode) -> String { /// Walk the markdown source with pulldown-cmark and split it into /// text segments and parsed Table segments. -fn parse_segments(markdown: &str) -> Vec { +fn parse_segments(markdown: &str, mode: TableMode) -> Vec { let mut opts = Options::empty(); opts.insert(Options::ENABLE_TABLES); @@ -138,9 +138,15 @@ fn parse_segments(markdown: &str) -> Vec { cell_buf.push_str(&t); } Event::Code(t) if in_table => { - cell_buf.push('`'); + // In Code mode the table is already inside a fenced code block, + // so backticks would render as literal characters. Strip them. + if mode != TableMode::Code { + cell_buf.push('`'); + } cell_buf.push_str(&t); - cell_buf.push('`'); + if mode != TableMode::Code { + cell_buf.push('`'); + } } // Inline markup inside cells: collect text, ignore tags Event::SoftBreak if in_table => { @@ -320,4 +326,27 @@ Some text after. let result = convert_tables(plain, TableMode::Code); assert_eq!(result, plain); } + + #[test] + fn code_mode_strips_backticks_from_code_cells() { + let md = "| col |\n|-----|\n| `value` |\n"; + let result = convert_tables(md, TableMode::Code); + // The table is inside a ``` block — backtick wrapping must be stripped. + assert!(result.contains("value"), "cell content should be present"); + // Only the fence markers themselves should contain backticks. + let inner = result + .trim_start_matches("```\n") + .trim_end_matches("```\n"); + assert!( + !inner.contains('`'), + "no backticks should appear inside the code fence: {result:?}" + ); + } + + #[test] + fn bullets_mode_keeps_backticks_in_code_cells() { + let md = "| col |\n|-----|\n| `value` |\n"; + let result = convert_tables(md, TableMode::Bullets); + assert!(result.contains("`value`"), "backticks should be kept in bullets mode"); + } }