Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion codex-rs/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions codex-rs/core/src/turn_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::sync::Arc;
use std::sync::Mutex;
use std::sync::RwLock;

use codex_utils_string::to_ascii_json_string;
use serde::Serialize;
use serde_json::Value;
use tokio::task::JoinHandle;
Expand Down Expand Up @@ -69,7 +70,7 @@ pub(crate) struct TurnMetadataBag {

impl TurnMetadataBag {
fn to_header_value(&self) -> Option<String> {
serde_json::to_string(self).ok()
to_ascii_json_string(self).ok()
}
}

Expand All @@ -84,7 +85,7 @@ fn merge_responsesapi_client_metadata(
.entry(key.clone())
.or_insert_with(|| Value::String(value.clone()));
}
serde_json::to_string(&metadata).ok()
to_ascii_json_string(&metadata).ok()
}

fn build_turn_metadata_bag(
Expand Down
16 changes: 15 additions & 1 deletion codex-rs/core/src/turn_metadata_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use core_test_support::PathBufExt;
use core_test_support::PathExt;
use pretty_assertions::assert_eq;
use serde_json::Value;
use std::collections::HashMap;
use tempfile::TempDir;
Expand All @@ -12,7 +13,7 @@ use tokio::process::Command;
#[tokio::test]
async fn build_turn_metadata_header_includes_has_changes_for_clean_repo() {
let temp_dir = TempDir::new().expect("temp dir");
let repo_path = temp_dir.path().join("repo").abs();
let repo_path = temp_dir.path().join("repo-東京").abs();
std::fs::create_dir_all(&repo_path).expect("create repo");

Command::new("git")
Expand Down Expand Up @@ -51,7 +52,16 @@ async fn build_turn_metadata_header_includes_has_changes_for_clean_repo() {
let header = build_turn_metadata_header(&repo_path, Some("none"))
.await
.expect("header");
assert!(header.is_ascii());
assert!(!header.contains("東京"));
let parsed: Value = serde_json::from_str(&header).expect("valid json");
let expected_repo_path = repo_path.to_string_lossy().into_owned();
let actual_repo_path = parsed
.get("workspaces")
.and_then(Value::as_object)
.and_then(|workspaces| workspaces.keys().next())
.expect("workspace path");
assert_eq!(actual_repo_path, &expected_repo_path);
let workspace = parsed
.get("workspaces")
.and_then(Value::as_object)
Expand Down Expand Up @@ -132,14 +142,18 @@ fn turn_metadata_state_merges_client_metadata_without_replacing_reserved_fields(
);
state.set_responsesapi_client_metadata(HashMap::from([
("fiber_run_id".to_string(), "fiber-123".to_string()),
("origin".to_string(), "東京".to_string()),
("session_id".to_string(), "client-supplied".to_string()),
("thread_source".to_string(), "client-supplied".to_string()),
]));

let header = state.current_header_value().expect("header");
assert!(header.is_ascii());
assert!(!header.contains("東京"));
let json: Value = serde_json::from_str(&header).expect("json");

assert_eq!(json["fiber_run_id"].as_str(), Some("fiber-123"));
assert_eq!(json["origin"].as_str(), Some("東京"));
assert_eq!(json["session_id"].as_str(), Some("session-a"));
assert_eq!(json["thread_source"].as_str(), Some("user"));
assert_eq!(json["turn_id"].as_str(), Some("turn-a"));
Expand Down
2 changes: 2 additions & 0 deletions codex-rs/utils/string/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ workspace = true

[dependencies]
regex-lite = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }

[dev-dependencies]
pretty_assertions = { workspace = true }
122 changes: 122 additions & 0 deletions codex-rs/utils/string/src/json.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
//! JSON serialization helpers for output that must remain parseable as JSON
//! while staying safe for ASCII-only transports.

use std::io;

use serde::Serialize;

struct AsciiJsonFormatter;

impl serde_json::ser::Formatter for AsciiJsonFormatter {
// serde_json has no ensure_ascii flag; this formatter keeps its serializer
// in charge and only escapes non-ASCII string fragments.
fn write_string_fragment<W>(&mut self, writer: &mut W, fragment: &str) -> io::Result<()>
Comment thread
etraut-openai marked this conversation as resolved.
where
W: ?Sized + io::Write,
{
let mut start = 0;
for (index, ch) in fragment.char_indices() {
if ch.is_ascii() {
continue;
}

if start < index {
writer.write_all(&fragment.as_bytes()[start..index])?;
}

let mut utf16 = [0; 2];
for code_unit in ch.encode_utf16(&mut utf16) {
write!(writer, "\\u{code_unit:04x}")?;
}
start = index + ch.len_utf8();
}

if start < fragment.len() {
writer.write_all(&fragment.as_bytes()[start..])?;
}

Ok(())
}
}

/// Serialize JSON while escaping non-ASCII string content as `\uXXXX`.
///
/// This is useful when JSON needs to remain parseable as JSON but must be
/// carried through ASCII-safe transports such as HTTP headers.
pub fn to_ascii_json_string<T>(value: &T) -> serde_json::Result<String>
where
T: Serialize + ?Sized,
{
let mut bytes = Vec::new();
let mut serializer = serde_json::Serializer::with_formatter(&mut bytes, AsciiJsonFormatter);
value.serialize(&mut serializer)?;
String::from_utf8(bytes)
.map_err(|err| serde_json::Error::io(io::Error::new(io::ErrorKind::InvalidData, err)))
}

#[cfg(test)]
mod tests {
use std::collections::BTreeMap;

use pretty_assertions::assert_eq;
use serde::Serialize;
use serde::ser::SerializeStruct;
use serde_json::Value;
use serde_json::json;

use super::to_ascii_json_string;

#[test]
fn to_ascii_json_string_escapes_non_ascii_strings() {
struct TestPayload;

impl Serialize for TestPayload {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let workspaces = BTreeMap::from([("/tmp/東京", TestWorkspace)]);
let mut state = serializer.serialize_struct("TestPayload", 1)?;
state.serialize_field("workspaces", &workspaces)?;
state.end()
}
}

struct TestWorkspace;

impl Serialize for TestWorkspace {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let mut state = serializer.serialize_struct("TestWorkspace", 2)?;
state.serialize_field("label", "Agentlarım")?;
state.serialize_field("emoji", "🚀")?;
state.end()
}
}

let value = TestPayload;
let expected_value = json!({
"workspaces": {
"/tmp/東京": {
"label": "Agentlarım",
"emoji": "🚀"
}
}
});

let serialized = to_ascii_json_string(&value).expect("serialize ascii json");

assert_eq!(
serialized,
r#"{"workspaces":{"/tmp/\u6771\u4eac":{"label":"Agentlar\u0131m","emoji":"\ud83d\ude80"}}}"#
);
assert!(serialized.is_ascii());
assert!(!serialized.contains("東京"));
Comment thread
etraut-openai marked this conversation as resolved.
assert!(!serialized.contains("Agentlarım"));
assert!(!serialized.contains("🚀"));
let parsed: Value = serde_json::from_str(&serialized).expect("serialized json");
assert_eq!(parsed, expected_value);
}
}
2 changes: 2 additions & 0 deletions codex-rs/utils/string/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
mod json;
mod truncate;

pub use json::to_ascii_json_string;
pub use truncate::approx_bytes_for_tokens;
pub use truncate::approx_token_count;
pub use truncate::approx_tokens_from_byte_count;
Expand Down
Loading