From a120d662e06c9df9033ba7878e1dfb2c603a48d4 Mon Sep 17 00:00:00 2001 From: Can Yu Date: Wed, 6 May 2026 22:28:53 +0800 Subject: [PATCH 1/3] feat(gateway): inbound attachment support for Google Chat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements image / text file / audio download from Google Chat via Media API + service account token, following the PR #731 base64 pattern. Changes: - GoogleChatMessage: parse attachment[] array (Attachment / AttachmentDataRef / DriveDataRef structs) - GoogleChatMediaRef enum: Image / File / Audio variants for typed dispatch - parse_attachments(): branches on contentType prefix, skips DRIVE_FILE source - download_googlechat_image(): resize → 1200px JPEG q75, max 10MB, GIF preserved - download_googlechat_file(): text extension whitelist (.txt/.md/.py/...), max 512KB - download_googlechat_audio(): forwarded as-is for core STT pipeline, max 25MB - media_url(): percent-encode resource_name as path segment - webhook handler: parses attachments, async-downloads via adapter token, populates Content.attachments - Empty-text events with attachments are now forwarded (previously dropped) - Tests: 11 new (parse, download success/skip/oversized, URL encoding) Refs: openabdev/openab#731 (Feishu pattern) Co-Authored-By: Claude Opus 4.6 --- docs/google-chat.md | 8 +- gateway/Cargo.lock | 2 +- gateway/src/adapters/googlechat.rs | 634 ++++++++++++++++++++++++++++- 3 files changed, 639 insertions(+), 5 deletions(-) diff --git a/docs/google-chat.md b/docs/google-chat.md index d51253c9..bcdc68c3 100644 --- a/docs/google-chat.md +++ b/docs/google-chat.md @@ -143,11 +143,17 @@ working_dir = "/home/agent" - Inline code, fenced code blocks: pass through unchanged - Tables and other unsupported syntax pass through as-is - **Streaming (edit_message)** — when OAB streaming is enabled, the bot edits its initial reply in-place as tokens arrive (typewriter effect) +- **Inbound attachments** — image, text file, and audio attachments are downloaded via Google Chat Media API and forwarded to the agent as base64 (PR #731 pattern): + - Images: resized to ≤1200px JPEG (q75); GIFs preserved. Max 10 MB. + - Text files: only known text extensions (`.txt`, `.md`, `.json`, `.py`, `.rs`, etc.). Max 512 KB. + - Audio: forwarded as-is for STT processing by core. Max 25 MB. + - Drive-sourced attachments are skipped (require separate Drive API integration). ### Not Supported - **Reactions** — Google Chat API does not support message reactions on behalf of bots -- **File/image attachments** — not yet implemented +- **Outbound attachments** — bot cannot send image/file attachments back to the user yet +- **Drive-linked attachments** — only `UPLOADED_CONTENT` source is handled; `DRIVE_FILE` source skipped ## Environment Variables (Gateway) diff --git a/gateway/Cargo.lock b/gateway/Cargo.lock index b0fa728b..8b5ba6be 100644 --- a/gateway/Cargo.lock +++ b/gateway/Cargo.lock @@ -1112,7 +1112,7 @@ checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "openab-gateway" -version = "0.1.0" +version = "0.4.0" dependencies = [ "aes", "anyhow", diff --git a/gateway/src/adapters/googlechat.rs b/gateway/src/adapters/googlechat.rs index 68759e02..1ced9413 100644 --- a/gateway/src/adapters/googlechat.rs +++ b/gateway/src/adapters/googlechat.rs @@ -12,6 +12,12 @@ use tracing::{error, info, warn}; pub const GOOGLE_CHAT_API_BASE: &str = "https://chat.googleapis.com/v1"; const GOOGLE_CHAT_MESSAGE_LIMIT: usize = 4096; +const IMAGE_MAX_DIMENSION_PX: u32 = 1200; +const IMAGE_JPEG_QUALITY: u8 = 75; +const IMAGE_MAX_DOWNLOAD: u64 = 10 * 1024 * 1024; // 10 MB +const FILE_MAX_DOWNLOAD: u64 = 512 * 1024; // 512 KB +const AUDIO_MAX_DOWNLOAD: u64 = 25 * 1024 * 1024; // 25 MB + // --- Google Chat types (v2 envelope format) --- #[derive(Debug, Deserialize)] @@ -42,6 +48,51 @@ pub struct GoogleChatMessage { pub sender: Option, pub thread: Option, pub space: Option, + #[serde(default)] + pub attachment: Vec, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct GoogleChatAttachment { + pub name: Option, + pub content_name: Option, + pub content_type: Option, + pub source: Option, + pub attachment_data_ref: Option, + pub drive_data_ref: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct AttachmentDataRef { + pub resource_name: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct DriveDataRef { + pub drive_file_id: Option, +} + +/// Reference to media that needs async download after webhook parse. +#[derive(Debug, Clone)] +pub enum GoogleChatMediaRef { + Image { + resource_name: String, + content_name: String, + content_type: String, + }, + File { + resource_name: String, + content_name: String, + content_type: String, + }, + Audio { + resource_name: String, + content_name: String, + content_type: String, + }, } #[derive(Debug, Deserialize)] @@ -438,7 +489,11 @@ pub async fn webhook( .as_deref() .or(msg.text.as_deref()) .unwrap_or(""); - if text.trim().is_empty() { + + let media_refs = parse_attachments(&msg.attachment); + + // Drop event only if BOTH text and attachments are empty + if text.trim().is_empty() && media_refs.is_empty() { return empty_json_response(); } @@ -473,7 +528,73 @@ pub async fn webhook( .unwrap_or(&msg.name) .to_string(); - let gw_event = GatewayEvent::new( + // Async download attachments (uses adapter's token + client + api_base) + let mut downloaded_attachments: Vec = Vec::new(); + if !media_refs.is_empty() { + if let Some(ref adapter) = state.google_chat { + if let Some(token) = adapter.get_token().await { + for media_ref in &media_refs { + let attachment = match media_ref { + GoogleChatMediaRef::Image { + resource_name, + content_name, + .. + } => { + download_googlechat_image( + &adapter.client, + &token, + &adapter.api_base, + resource_name, + content_name, + ) + .await + } + GoogleChatMediaRef::File { + resource_name, + content_name, + .. + } => { + download_googlechat_file( + &adapter.client, + &token, + &adapter.api_base, + resource_name, + content_name, + ) + .await + } + GoogleChatMediaRef::Audio { + resource_name, + content_name, + content_type, + } => { + download_googlechat_audio( + &adapter.client, + &token, + &adapter.api_base, + resource_name, + content_name, + content_type, + ) + .await + } + }; + if let Some(att) = attachment { + downloaded_attachments.push(att); + } + } + } else { + warn!("googlechat: no token available for attachment download"); + } + } + } + + // If text is empty AND no attachments downloaded successfully, drop event + if text.trim().is_empty() && downloaded_attachments.is_empty() { + return empty_json_response(); + } + + let mut gw_event = GatewayEvent::new( "googlechat", ChannelInfo { id: space_name.clone(), @@ -490,9 +611,15 @@ pub async fn webhook( &message_id, vec![], ); + gw_event.content.attachments = downloaded_attachments; let json = serde_json::to_string(&gw_event).unwrap(); - info!(space = %space_name, sender = %sender_name, "googlechat → gateway"); + info!( + space = %space_name, + sender = %sender_name, + attachment_count = gw_event.content.attachments.len(), + "googlechat → gateway" + ); let _ = state.event_tx.send(json); empty_json_response() } @@ -903,6 +1030,254 @@ fn split_text(text: &str, limit: usize) -> Vec<&str> { chunks } +// --- Attachment parsing & download --- + +/// Whitelist of text-like file extensions for `download_googlechat_file`. +const TEXT_EXTS: &[&str] = &[ + "txt", "csv", "log", "md", "json", "jsonl", "yaml", "yml", "toml", "xml", + "rs", "py", "js", "ts", "jsx", "tsx", "go", "java", "c", "cpp", "h", "hpp", + "rb", "sh", "bash", "sql", "html", "css", "ini", "cfg", "conf", "env", +]; + +/// Parse Google Chat attachment array into media references for async download. +/// +/// Skips Drive-sourced attachments (different download API), and unknown +/// content types. Branches on `contentType` prefix to bucket into image / +/// audio / file. +fn parse_attachments(attachments: &[GoogleChatAttachment]) -> Vec { + let mut refs = Vec::new(); + for att in attachments { + // Only handle UPLOADED_CONTENT (Drive needs separate Drive API call) + if att.source.as_deref() != Some("UPLOADED_CONTENT") { + continue; + } + let resource_name = match att + .attachment_data_ref + .as_ref() + .and_then(|d| d.resource_name.clone()) + { + Some(rn) => rn, + None => continue, + }; + let content_type = att.content_type.clone().unwrap_or_default(); + let content_name = att.content_name.clone().unwrap_or_else(|| "file".into()); + + if content_type.starts_with("image/") { + refs.push(GoogleChatMediaRef::Image { + resource_name, + content_name, + content_type, + }); + } else if content_type.starts_with("audio/") { + refs.push(GoogleChatMediaRef::Audio { + resource_name, + content_name, + content_type, + }); + } else { + // Treat as file — download_googlechat_file checks extension whitelist + refs.push(GoogleChatMediaRef::File { + resource_name, + content_name, + content_type, + }); + } + } + refs +} + +/// Resize image so longest side ≤ 1200px, then encode as JPEG. +/// GIFs are passed through unchanged to preserve animation. +fn resize_and_compress(raw: &[u8]) -> Result<(Vec, String), image::ImageError> { + use image::ImageReader; + use std::io::Cursor; + + let reader = ImageReader::new(Cursor::new(raw)).with_guessed_format()?; + let format = reader.format(); + if format == Some(image::ImageFormat::Gif) { + return Ok((raw.to_vec(), "image/gif".to_string())); + } + let img = reader.decode()?; + let (w, h) = (img.width(), img.height()); + let img = if w > IMAGE_MAX_DIMENSION_PX || h > IMAGE_MAX_DIMENSION_PX { + let max_side = std::cmp::max(w, h); + let ratio = f64::from(IMAGE_MAX_DIMENSION_PX) / f64::from(max_side); + let new_w = (f64::from(w) * ratio) as u32; + let new_h = (f64::from(h) * ratio) as u32; + img.resize(new_w, new_h, image::imageops::FilterType::Lanczos3) + } else { + img + }; + let mut buf = Cursor::new(Vec::new()); + let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut buf, IMAGE_JPEG_QUALITY); + img.write_with_encoder(encoder)?; + Ok((buf.into_inner(), "image/jpeg".to_string())) +} + +/// Build the Media API URL for a given resource_name. +/// resource_name is encoded as path segment. +fn media_url(api_base: &str, resource_name: &str) -> String { + // Percent-encode each character that needs escaping in a URL path segment. + let encoded: String = resource_name + .bytes() + .map(|b| match b { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + (b as char).to_string() + } + _ => format!("%{:02X}", b), + }) + .collect(); + format!("{}/media/{}?alt=media", api_base, encoded) +} + +/// Download an image attachment via Google Chat Media API → resize/compress → base64. +pub async fn download_googlechat_image( + client: &reqwest::Client, + token: &str, + api_base: &str, + resource_name: &str, + content_name: &str, +) -> Option { + let url = media_url(api_base, resource_name); + let resp = match client.get(&url).bearer_auth(token).send().await { + Ok(r) => r, + Err(e) => { + warn!(content_name, error = %e, "googlechat image download failed"); + return None; + } + }; + if !resp.status().is_success() { + warn!(content_name, status = %resp.status(), "googlechat image download failed"); + return None; + } + if let Some(cl) = resp.headers().get(reqwest::header::CONTENT_LENGTH) { + if let Ok(size) = cl.to_str().unwrap_or("0").parse::() { + if size > IMAGE_MAX_DOWNLOAD { + warn!(content_name, size, "googlechat image Content-Length exceeds 10MB limit"); + return None; + } + } + } + let bytes = resp.bytes().await.ok()?; + if bytes.len() as u64 > IMAGE_MAX_DOWNLOAD { + warn!(content_name, size = bytes.len(), "googlechat image exceeds 10MB limit"); + return None; + } + let (compressed, mime) = match resize_and_compress(&bytes) { + Ok(v) => v, + Err(e) => { + warn!(content_name, error = %e, "googlechat image resize failed"); + return None; + } + }; + use base64::Engine; + let data = base64::engine::general_purpose::STANDARD.encode(&compressed); + Some(crate::schema::Attachment { + attachment_type: "image".into(), + filename: content_name.to_string(), + mime_type: mime, + data, + size: compressed.len() as u64, + }) +} + +/// Download a text-like file via Google Chat Media API → base64. +/// Non-text extensions are skipped to avoid sending binary garbage to the model. +pub async fn download_googlechat_file( + client: &reqwest::Client, + token: &str, + api_base: &str, + resource_name: &str, + content_name: &str, +) -> Option { + let ext = content_name.rsplit('.').next().unwrap_or("").to_lowercase(); + if !TEXT_EXTS.contains(&ext.as_str()) { + tracing::debug!(content_name, "skipping non-text googlechat file attachment"); + return None; + } + let url = media_url(api_base, resource_name); + let resp = match client.get(&url).bearer_auth(token).send().await { + Ok(r) => r, + Err(e) => { + warn!(content_name, error = %e, "googlechat file download failed"); + return None; + } + }; + if !resp.status().is_success() { + warn!(content_name, status = %resp.status(), "googlechat file download failed"); + return None; + } + if let Some(cl) = resp.headers().get(reqwest::header::CONTENT_LENGTH) { + if let Ok(size) = cl.to_str().unwrap_or("0").parse::() { + if size > FILE_MAX_DOWNLOAD { + warn!(content_name, size, "googlechat file Content-Length exceeds 512KB limit"); + return None; + } + } + } + let bytes = resp.bytes().await.ok()?; + if bytes.len() as u64 > FILE_MAX_DOWNLOAD { + warn!(content_name, size = bytes.len(), "googlechat file exceeds 512KB limit"); + return None; + } + let text = String::from_utf8_lossy(&bytes); + use base64::Engine; + let data = base64::engine::general_purpose::STANDARD.encode(text.as_bytes()); + Some(crate::schema::Attachment { + attachment_type: "text_file".into(), + filename: content_name.to_string(), + mime_type: "text/plain".into(), + data, + size: bytes.len() as u64, + }) +} + +/// Download an audio attachment as-is (no resize/transcode) → base64. +/// Core's STT pipeline (when available) consumes this as `audio` attachment_type. +pub async fn download_googlechat_audio( + client: &reqwest::Client, + token: &str, + api_base: &str, + resource_name: &str, + content_name: &str, + content_type: &str, +) -> Option { + let url = media_url(api_base, resource_name); + let resp = match client.get(&url).bearer_auth(token).send().await { + Ok(r) => r, + Err(e) => { + warn!(content_name, error = %e, "googlechat audio download failed"); + return None; + } + }; + if !resp.status().is_success() { + warn!(content_name, status = %resp.status(), "googlechat audio download failed"); + return None; + } + if let Some(cl) = resp.headers().get(reqwest::header::CONTENT_LENGTH) { + if let Ok(size) = cl.to_str().unwrap_or("0").parse::() { + if size > AUDIO_MAX_DOWNLOAD { + warn!(content_name, size, "googlechat audio Content-Length exceeds 25MB limit"); + return None; + } + } + } + let bytes = resp.bytes().await.ok()?; + if bytes.len() as u64 > AUDIO_MAX_DOWNLOAD { + warn!(content_name, size = bytes.len(), "googlechat audio exceeds 25MB limit"); + return None; + } + use base64::Engine; + let data = base64::engine::general_purpose::STANDARD.encode(&bytes); + Some(crate::schema::Attachment { + attachment_type: "audio".into(), + filename: content_name.to_string(), + mime_type: content_type.to_string(), + data, + size: bytes.len() as u64, + }) +} + #[cfg(test)] mod tests { use super::*; @@ -1370,6 +1745,7 @@ mod tests { }, content: Content { content_type: "text".into(), + attachments: Vec::new(), text: "hello".into(), }, command: None, @@ -1412,6 +1788,7 @@ mod tests { }, content: Content { content_type: "text".into(), + attachments: Vec::new(), text: "hello".into(), }, command: None, @@ -1458,6 +1835,7 @@ mod tests { }, content: Content { content_type: "text".into(), + attachments: Vec::new(), text: "".into(), }, command: None, @@ -1501,6 +1879,7 @@ mod tests { }, content: Content { content_type: "text".into(), + attachments: Vec::new(), text: long_text, }, command: None, @@ -1534,6 +1913,7 @@ mod tests { }, content: Content { content_type: "text".into(), + attachments: Vec::new(), text: "hello".into(), }, command: None, @@ -1578,6 +1958,7 @@ mod tests { }, content: Content { content_type: "text".into(), + attachments: Vec::new(), text: "updated text".into(), }, command: Some("edit_message".into()), @@ -1619,6 +2000,7 @@ mod tests { }, content: Content { content_type: "text".into(), + attachments: Vec::new(), text: long_text, }, command: None, @@ -1675,6 +2057,7 @@ mod tests { }, content: Content { content_type: "text".into(), + attachments: Vec::new(), text: long_text, }, command: None, @@ -1692,4 +2075,249 @@ mod tests { let err = resp.error.expect("partial failure should set error"); assert!(err.contains("500")); } + + // --- Attachment parsing tests --- + + fn make_attachment( + source: &str, + content_type: &str, + content_name: &str, + resource_name: Option<&str>, + ) -> GoogleChatAttachment { + GoogleChatAttachment { + name: Some("spaces/SP/messages/MSG/attachments/ATT".into()), + content_name: Some(content_name.into()), + content_type: Some(content_type.into()), + source: Some(source.into()), + attachment_data_ref: resource_name.map(|rn| AttachmentDataRef { + resource_name: Some(rn.into()), + }), + drive_data_ref: None, + } + } + + #[test] + fn parse_attachments_image() { + let atts = vec![make_attachment( + "UPLOADED_CONTENT", + "image/png", + "photo.png", + Some("AATT_resource"), + )]; + let refs = parse_attachments(&atts); + assert_eq!(refs.len(), 1); + match &refs[0] { + GoogleChatMediaRef::Image { + resource_name, + content_name, + content_type, + } => { + assert_eq!(resource_name, "AATT_resource"); + assert_eq!(content_name, "photo.png"); + assert_eq!(content_type, "image/png"); + } + other => panic!("expected Image, got {:?}", other), + } + } + + #[test] + fn parse_attachments_audio() { + let atts = vec![make_attachment( + "UPLOADED_CONTENT", + "audio/mp4", + "voice.m4a", + Some("AATT"), + )]; + let refs = parse_attachments(&atts); + assert!(matches!(refs[0], GoogleChatMediaRef::Audio { .. })); + } + + #[test] + fn parse_attachments_file() { + let atts = vec![make_attachment( + "UPLOADED_CONTENT", + "text/plain", + "notes.txt", + Some("AATT"), + )]; + let refs = parse_attachments(&atts); + assert!(matches!(refs[0], GoogleChatMediaRef::File { .. })); + } + + #[test] + fn parse_attachments_skips_drive() { + let atts = vec![GoogleChatAttachment { + name: Some("spaces/SP/messages/MSG/attachments/ATT".into()), + content_name: Some("doc".into()), + content_type: Some("application/vnd.google-apps.document".into()), + source: Some("DRIVE_FILE".into()), + attachment_data_ref: None, + drive_data_ref: Some(DriveDataRef { + drive_file_id: Some("drive_id_123".into()), + }), + }]; + assert_eq!(parse_attachments(&atts).len(), 0); + } + + #[test] + fn parse_attachments_skips_missing_resource_name() { + let atts = vec![make_attachment( + "UPLOADED_CONTENT", + "image/png", + "photo.png", + None, + )]; + assert_eq!(parse_attachments(&atts).len(), 0); + } + + #[test] + fn media_url_encodes_resource_name() { + let url = media_url("https://chat.googleapis.com/v1", "AATT/some+resource=name"); + assert_eq!( + url, + "https://chat.googleapis.com/v1/media/AATT%2Fsome%2Bresource%3Dname?alt=media" + ); + } + + #[tokio::test] + async fn download_googlechat_image_resizes_and_returns_attachment() { + use wiremock::{Mock, MockServer, ResponseTemplate}; + use wiremock::matchers::{method, path_regex}; + + // Generate a small valid PNG + let img = image::RgbImage::from_pixel(10, 10, image::Rgb([255, 0, 0])); + let mut buf = std::io::Cursor::new(Vec::new()); + image::DynamicImage::ImageRgb8(img) + .write_to(&mut buf, image::ImageFormat::Png) + .unwrap(); + let png_bytes = buf.into_inner(); + + let mock_server = MockServer::start().await; + Mock::given(method("GET")) + .and(path_regex("/media/.*")) + .respond_with( + ResponseTemplate::new(200) + .set_body_bytes(png_bytes) + .insert_header("content-type", "image/png"), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::new(); + let result = download_googlechat_image( + &client, + "fake-token", + &mock_server.uri(), + "AATT_resource", + "photo.png", + ) + .await; + let att = result.expect("expected successful download"); + assert_eq!(att.attachment_type, "image"); + assert_eq!(att.filename, "photo.png"); + assert_eq!(att.mime_type, "image/jpeg"); // resized PNG → JPEG + assert!(!att.data.is_empty()); + assert!(att.size > 0); + } + + #[tokio::test] + async fn download_googlechat_file_rejects_non_text_extension() { + let client = reqwest::Client::new(); + let result = download_googlechat_file( + &client, + "fake-token", + "https://unused", // not called for non-text + "AATT", + "binary.exe", + ) + .await; + assert!(result.is_none(), "non-text extensions must be skipped"); + } + + #[tokio::test] + async fn download_googlechat_file_text_extension_succeeds() { + use wiremock::{Mock, MockServer, ResponseTemplate}; + use wiremock::matchers::{method, path_regex}; + + let mock_server = MockServer::start().await; + Mock::given(method("GET")) + .and(path_regex("/media/.*")) + .respond_with( + ResponseTemplate::new(200).set_body_bytes(b"hello world".to_vec()), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::new(); + let result = download_googlechat_file( + &client, + "fake-token", + &mock_server.uri(), + "AATT", + "notes.txt", + ) + .await; + let att = result.expect("expected successful download"); + assert_eq!(att.attachment_type, "text_file"); + assert_eq!(att.filename, "notes.txt"); + assert_eq!(att.mime_type, "text/plain"); + } + + #[tokio::test] + async fn download_googlechat_audio_returns_attachment() { + use wiremock::{Mock, MockServer, ResponseTemplate}; + use wiremock::matchers::{method, path_regex}; + + let mock_server = MockServer::start().await; + let audio_bytes = vec![0u8; 1024]; + Mock::given(method("GET")) + .and(path_regex("/media/.*")) + .respond_with(ResponseTemplate::new(200).set_body_bytes(audio_bytes.clone())) + .mount(&mock_server) + .await; + + let client = reqwest::Client::new(); + let result = download_googlechat_audio( + &client, + "fake-token", + &mock_server.uri(), + "AATT", + "voice.m4a", + "audio/mp4", + ) + .await; + let att = result.expect("expected successful download"); + assert_eq!(att.attachment_type, "audio"); + assert_eq!(att.filename, "voice.m4a"); + assert_eq!(att.mime_type, "audio/mp4"); + assert_eq!(att.size, 1024); + } + + #[tokio::test] + async fn download_googlechat_image_rejects_oversized_content_length() { + use wiremock::{Mock, MockServer, ResponseTemplate}; + use wiremock::matchers::{method, path_regex}; + + let mock_server = MockServer::start().await; + Mock::given(method("GET")) + .and(path_regex("/media/.*")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("content-length", "20000000") // 20 MB > 10 MB limit + .set_body_bytes(vec![0u8; 100]), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::new(); + let result = download_googlechat_image( + &client, + "fake-token", + &mock_server.uri(), + "AATT", + "huge.png", + ) + .await; + assert!(result.is_none(), "oversized image must be rejected"); + } } From d0343319bd62ca24c80d51c7e0f2bae1209904fb Mon Sep 17 00:00:00 2001 From: Can Yu Date: Wed, 6 May 2026 23:07:13 +0800 Subject: [PATCH 2/3] feat(core): STT for Custom Gateway audio attachments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends src/gateway.rs attachment handling to transcribe audio attachments via the existing STT pipeline (previously only Discord/Slack adapters went through download_and_transcribe; Custom Gateway adapters got no audio path even though stt::transcribe was available). When a gateway adapter (Feishu, Google Chat, etc.) sends an Attachment with attachment_type = "audio", core now: 1. Decodes base64 → audio bytes 2. Calls stt::transcribe with the configured SttConfig 3. Wraps the transcript as a ContentBlock::Text: "[Voice message transcript]: ..." The audio branch is gated on stt_config.enabled — if STT is disabled in config, audio attachments fall through unchanged (same as before). Threads stt_config through GatewayParams and run_gateway_adapter. This closes the audio attachment gap left by the (now-closed) PR #726 without re-introducing the HTTP MediaStore proxy approach. Pairs with the Google Chat adapter audio download (separate PR) — once both land, Google Chat voice/audio attachments work end-to-end. Co-Authored-By: Claude Opus 4.6 --- src/gateway.rs | 23 +++++++++++++++++++++++ src/main.rs | 1 + 2 files changed, 24 insertions(+) diff --git a/src/gateway.rs b/src/gateway.rs index 8aed6aab..ddaa0345 100644 --- a/src/gateway.rs +++ b/src/gateway.rs @@ -487,6 +487,7 @@ pub struct GatewayParams { pub allow_all_users: bool, pub allowed_users: Vec, pub streaming: bool, + pub stt_config: crate::config::SttConfig, } pub async fn run_gateway_adapter( @@ -505,6 +506,7 @@ pub async fn run_gateway_adapter( let allow_all_users = params.allow_all_users; let allowed_users = params.allowed_users; let streaming = params.streaming; + let stt_config = params.stt_config; let connect_url = match ¶ms.token { Some(token) => { @@ -671,6 +673,27 @@ pub async fn run_gateway_adapter( }); } } + "audio" if stt_config.enabled => { + use base64::Engine; + if let Ok(bytes) = base64::engine::general_purpose::STANDARD.decode(&att.data) { + match crate::stt::transcribe( + &crate::media::HTTP_CLIENT, + &stt_config, + bytes, + att.filename.clone(), + &att.mime_type, + ).await { + Some(transcript) => { + extra_blocks.push(ContentBlock::Text { + text: format!("[Voice message transcript]: {transcript}"), + }); + } + None => { + tracing::warn!(filename = %att.filename, "gateway audio STT failed"); + } + } + } + } _ => {} } } diff --git a/src/main.rs b/src/main.rs index 04a0937f..188762f3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -269,6 +269,7 @@ async fn main() -> anyhow::Result<()> { allow_all_users: config::resolve_allow_all(gw_cfg.allow_all_users, &gw_cfg.allowed_users), allowed_users: gw_cfg.allowed_users, streaming: gw_cfg.streaming, + stt_config: cfg.stt.clone(), }; let gw_router = router.clone(); Some(tokio::spawn(async move { From 3b95c0adbf20d7977469796af723dc1bcdba0b12 Mon Sep 17 00:00:00 2001 From: Can Yu Date: Thu, 7 May 2026 01:11:13 +0800 Subject: [PATCH 3/3] fix(gateway): address googlechat attachment review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses canyugs/openab#4 must-fix items: #1+#2 Webhook timeout safety: - Spawn background tokio task for attachment downloads so the webhook returns 200 within Google Chat's 30s deadline regardless of how long downloads take. - Add 30s per-request timeout to all Media API GET calls — a single hung connection can no longer stall the download task indefinitely. - Refactor event emission into send_googlechat_event helper to share between sync (no-attachment) and async (background download) paths. #4 Text file caps (matches Discord/Slack): - TEXT_FILE_COUNT_CAP = 5: skip text files past the 5th with a warning. - TEXT_TOTAL_CAP = 1 MB: skip text files that would push the running aggregate past 1 MB with a warning. - Image and audio attachments are not capped (same as Discord/Slack). #6 STT silent failure: - When stt::transcribe returns None, push a fallback ContentBlock::Text ("[Voice message — transcription failed for ...]") so the agent knows a voice message was attempted and can ask the user to re-send. Previously the failure was silent and confusing. Skipped from issue #4: #3 (streaming download), #5 (cross-adapter refactor — adapters stay independent per design), #7-#10 (cosmetic). Co-Authored-By: Claude Opus 4.6 --- gateway/src/adapters/googlechat.rs | 155 +++++++++++++++++++++++------ src/gateway.rs | 6 ++ 2 files changed, 131 insertions(+), 30 deletions(-) diff --git a/gateway/src/adapters/googlechat.rs b/gateway/src/adapters/googlechat.rs index 1ced9413..910f129f 100644 --- a/gateway/src/adapters/googlechat.rs +++ b/gateway/src/adapters/googlechat.rs @@ -17,6 +17,13 @@ const IMAGE_JPEG_QUALITY: u8 = 75; const IMAGE_MAX_DOWNLOAD: u64 = 10 * 1024 * 1024; // 10 MB const FILE_MAX_DOWNLOAD: u64 = 512 * 1024; // 512 KB const AUDIO_MAX_DOWNLOAD: u64 = 25 * 1024 * 1024; // 25 MB +/// Per-request timeout for Google Chat Media API downloads. Prevents a hung +/// connection from blocking the spawned download task indefinitely. +const MEDIA_REQUEST_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30); +/// Cap on text file attachments per message (matches Discord/Slack). +const TEXT_FILE_COUNT_CAP: usize = 5; +/// Cap on aggregate text file bytes per message (matches Discord/Slack 1 MB). +const TEXT_TOTAL_CAP: u64 = 1024 * 1024; // --- Google Chat types (v2 envelope format) --- @@ -488,7 +495,8 @@ pub async fn webhook( .argument_text .as_deref() .or(msg.text.as_deref()) - .unwrap_or(""); + .unwrap_or("") + .to_string(); let media_refs = parse_attachments(&msg.attachment); @@ -528,9 +536,30 @@ pub async fn webhook( .unwrap_or(&msg.name) .to_string(); - // Async download attachments (uses adapter's token + client + api_base) - let mut downloaded_attachments: Vec = Vec::new(); - if !media_refs.is_empty() { + // No attachments → emit event synchronously and respond 200 + if media_refs.is_empty() { + send_googlechat_event( + &state, + &space_name, + space_type, + thread_id, + &sender_id, + &sender_name, + &display_name, + &text, + &message_id, + Vec::new(), + ); + return empty_json_response(); + } + + // Has attachments — spawn background task so the webhook returns 200 within + // Google Chat's 30 s deadline regardless of how long downloads take. + let state = state.clone(); + tokio::spawn(async move { + let mut downloaded: Vec = Vec::new(); + let mut text_file_count: usize = 0; + let mut text_file_bytes: u64 = 0; if let Some(ref adapter) = state.google_chat { if let Some(token) = adapter.get_token().await { for media_ref in &media_refs { @@ -554,14 +583,41 @@ pub async fn webhook( content_name, .. } => { - download_googlechat_file( - &adapter.client, - &token, - &adapter.api_base, - resource_name, - content_name, - ) - .await + // Match Discord/Slack: cap text file count and aggregate bytes. + if text_file_count >= TEXT_FILE_COUNT_CAP { + warn!( + content_name = %content_name, + cap = TEXT_FILE_COUNT_CAP, + "googlechat text file count cap reached, skipping" + ); + None + } else { + let result = download_googlechat_file( + &adapter.client, + &token, + &adapter.api_base, + resource_name, + content_name, + ) + .await; + if let Some(ref att) = result { + if text_file_bytes + att.size > TEXT_TOTAL_CAP { + warn!( + content_name = %content_name, + total = text_file_bytes + att.size, + cap = TEXT_TOTAL_CAP, + "googlechat text file aggregate exceeds cap, skipping" + ); + None + } else { + text_file_count += 1; + text_file_bytes += att.size; + result + } + } else { + None + } + } } GoogleChatMediaRef::Audio { resource_name, @@ -580,48 +636,87 @@ pub async fn webhook( } }; if let Some(att) = attachment { - downloaded_attachments.push(att); + downloaded.push(att); } } } else { warn!("googlechat: no token available for attachment download"); } } - } - // If text is empty AND no attachments downloaded successfully, drop event - if text.trim().is_empty() && downloaded_attachments.is_empty() { - return empty_json_response(); - } + // If text is empty AND every attachment failed to download, drop the event. + if text.trim().is_empty() && downloaded.is_empty() { + warn!( + space = %space_name, + "googlechat: empty text + all attachments failed, dropping event" + ); + return; + } + + send_googlechat_event( + &state, + &space_name, + space_type, + thread_id, + &sender_id, + &sender_name, + &display_name, + &text, + &message_id, + downloaded, + ); + }); + + empty_json_response() +} +#[allow(clippy::too_many_arguments)] +fn send_googlechat_event( + state: &Arc, + space_name: &str, + space_type: String, + thread_id: Option, + sender_id: &str, + sender_name: &str, + display_name: &str, + text: &str, + message_id: &str, + attachments: Vec, +) { let mut gw_event = GatewayEvent::new( "googlechat", ChannelInfo { - id: space_name.clone(), + id: space_name.to_string(), channel_type: space_type, thread_id, }, SenderInfo { - id: sender_id, - name: sender_name.clone(), - display_name, + id: sender_id.to_string(), + name: sender_name.to_string(), + display_name: display_name.to_string(), is_bot: false, }, text, - &message_id, + message_id, vec![], ); - gw_event.content.attachments = downloaded_attachments; + gw_event.content.attachments = attachments; - let json = serde_json::to_string(&gw_event).unwrap(); + let attachment_count = gw_event.content.attachments.len(); + let json = match serde_json::to_string(&gw_event) { + Ok(j) => j, + Err(e) => { + error!(error = %e, "googlechat: failed to serialize GatewayEvent"); + return; + } + }; info!( space = %space_name, sender = %sender_name, - attachment_count = gw_event.content.attachments.len(), + attachment_count, "googlechat → gateway" ); let _ = state.event_tx.send(json); - empty_json_response() } fn empty_json_response() -> axum::response::Response { @@ -1139,7 +1234,7 @@ pub async fn download_googlechat_image( content_name: &str, ) -> Option { let url = media_url(api_base, resource_name); - let resp = match client.get(&url).bearer_auth(token).send().await { + let resp = match client.get(&url).bearer_auth(token).timeout(MEDIA_REQUEST_TIMEOUT).send().await { Ok(r) => r, Err(e) => { warn!(content_name, error = %e, "googlechat image download failed"); @@ -1196,7 +1291,7 @@ pub async fn download_googlechat_file( return None; } let url = media_url(api_base, resource_name); - let resp = match client.get(&url).bearer_auth(token).send().await { + let resp = match client.get(&url).bearer_auth(token).timeout(MEDIA_REQUEST_TIMEOUT).send().await { Ok(r) => r, Err(e) => { warn!(content_name, error = %e, "googlechat file download failed"); @@ -1243,7 +1338,7 @@ pub async fn download_googlechat_audio( content_type: &str, ) -> Option { let url = media_url(api_base, resource_name); - let resp = match client.get(&url).bearer_auth(token).send().await { + let resp = match client.get(&url).bearer_auth(token).timeout(MEDIA_REQUEST_TIMEOUT).send().await { Ok(r) => r, Err(e) => { warn!(content_name, error = %e, "googlechat audio download failed"); diff --git a/src/gateway.rs b/src/gateway.rs index ddaa0345..04287d6e 100644 --- a/src/gateway.rs +++ b/src/gateway.rs @@ -690,6 +690,12 @@ pub async fn run_gateway_adapter( } None => { tracing::warn!(filename = %att.filename, "gateway audio STT failed"); + extra_blocks.push(ContentBlock::Text { + text: format!( + "[Voice message — transcription failed for {}]", + att.filename + ), + }); } } }