Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ Legend: [ ] todo, [x] done, [~] in progress

## M1. Quality & Safety

- [ ] Placeholder normalization and validation
- [ ] Mapping between iOS (`%1$@`, `%d`) and Android (`%1$s`, `%d`)
- [ ] Detect placeholder mismatches across languages; fail in strict mode, warn otherwise
- [ ] Auto‑fix option for common cases (`%@` → `%s`, `%1$@` → `%1$s`)
- [ ] Tests across `.strings`, Android, `.xcstrings`
- [x] Placeholder normalization and validation
- [x] Mapping between iOS (`%1$@`, `%@`, `%ld`) and Android (`%1$s`, `%s`, `%d/%u`)
- [x] Detect placeholder mismatches across languages; strict vs non‑strict modes
- [x] Auto‑fix option for common cases (`normalize_placeholders_in_place`)
- [x] Tests across singular and plural entries; cross‑language normalization
- [ ] Plural rules engine
- [ ] CLDR‑driven required category sets per locale (few/many/etc.)
- [ ] Validation pass: flag missing categories per key+locale
Expand Down
6 changes: 4 additions & 2 deletions langcodec-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ mod debug;
mod formats;
mod merge;
mod path_glob;
mod stats;
mod transformers;
mod validation;
mod view;
mod stats;

use crate::convert::{ConvertOptions, run_unified_convert_command, try_custom_format_view};
use crate::debug::run_debug_command;
Expand Down Expand Up @@ -304,7 +304,9 @@ fn main() {
Commands::Stats { input, lang, json } => {
// Validate
let mut context = ValidationContext::new().with_input_file(input.clone());
if let Some(l) = &lang { context = context.with_language_code(l.clone()); }
if let Some(l) = &lang {
context = context.with_language_code(l.clone());
}
if let Err(e) = validate_context(&context) {
eprintln!("❌ Validation failed: {}", e);
std::process::exit(1);
Expand Down
12 changes: 9 additions & 3 deletions langcodec-cli/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ fn accumulate(lang_stats: &mut LangStats, status: &EntryStatus) {

pub fn print_stats(codec: &Codec, lang_filter: &Option<String>, json_output: bool) {
let resources: Vec<_> = match lang_filter {
Some(lang) => codec.resources.iter().filter(|r| r.metadata.language == *lang).collect(),
Some(lang) => codec
.resources
.iter()
.filter(|r| r.metadata.language == *lang)
.collect(),
None => codec.resources.iter().collect(),
};

Expand Down Expand Up @@ -84,7 +88,10 @@ pub fn print_stats(codec: &Codec, lang_filter: &Option<String>, json_output: boo
println!(" Total: {}", stats.total);
println!(" By status:");
for (k, v) in [
("translated", stats.by_status.get("translated").copied().unwrap_or(0)),
(
"translated",
stats.by_status.get("translated").copied().unwrap_or(0),
),
(
"needs_review",
stats.by_status.get("needs_review").copied().unwrap_or(0),
Expand All @@ -105,4 +112,3 @@ pub fn print_stats(codec: &Codec, lang_filter: &Option<String>, json_output: boo
println!(" Completion: {:.2}%", percent);
}
}

1 change: 0 additions & 1 deletion langcodec-cli/tests/stats_cli_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,3 @@ fn test_stats_json_on_android_strings() {
assert_eq!(by_status["do_not_translate"], 1);
assert_eq!(by_status["new"], 1);
}

308 changes: 308 additions & 0 deletions langcodec/src/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,182 @@ impl Codec {
.retain(|resource| !resource.entries.is_empty());
}

/// Validate placeholder consistency across languages for each key.
///
/// Rules (initial version):
/// - For each key, each language must have the same placeholder signature.
/// - For plural entries, all forms within a language must share the same signature.
/// - iOS vs Android differences like `%@`/`%1$@` vs `%s`/`%1$s` are normalized.
///
/// Example
/// ```rust
/// use langcodec::{Codec, types::{Entry, EntryStatus, Metadata, Resource, Translation}};
/// let mut codec = Codec::new();
/// let en = Resource{
/// metadata: Metadata{ language: "en".into(), domain: String::new(), custom: Default::default() },
/// entries: vec![Entry{ id: "greet".into(), value: Translation::Singular("Hello %1$@".into()), comment: None, status: EntryStatus::Translated, custom: Default::default() }]
/// };
/// let fr = Resource{
/// metadata: Metadata{ language: "fr".into(), domain: String::new(), custom: Default::default() },
/// entries: vec![Entry{ id: "greet".into(), value: Translation::Singular("Bonjour %1$s".into()), comment: None, status: EntryStatus::Translated, custom: Default::default() }]
/// };
/// codec.add_resource(en);
/// codec.add_resource(fr);
/// assert!(codec.validate_placeholders(true).is_ok());
/// ```
pub fn validate_placeholders(&self, strict: bool) -> Result<(), Error> {
use crate::placeholder::signature;
use crate::types::Translation;
use std::collections::HashMap;

// key -> lang -> Vec<signatures per form or single>
let mut map: HashMap<String, HashMap<String, Vec<Vec<String>>>> = HashMap::new();

for res in &self.resources {
for entry in &res.entries {
let sigs: Vec<Vec<String>> = match &entry.value {
Translation::Singular(v) => vec![signature(v)],
Translation::Plural(p) => p.forms.values().map(|v| signature(v)).collect(),
};
map.entry(entry.id.clone())
.or_default()
.entry(res.metadata.language.clone())
.or_default()
.push(sigs.into_iter().flatten().collect());
}
}

let mut problems = Vec::new();

for (key, langs) in map {
// Per-language: ensure all collected signatures for this entry are identical
let mut per_lang_sig: HashMap<String, Vec<String>> = HashMap::new();
for (lang, sig_lists) in langs {
if let Some(first) = sig_lists.first() {
if sig_lists.iter().any(|s| s != first) {
problems.push(format!(
"Key '{}' in '{}': inconsistent placeholders across forms: {:?}",
key, lang, sig_lists
));
}
per_lang_sig.insert(lang, first.clone());
}
}

// Across languages, pick one baseline and compare
if let Some((base_lang, base_sig)) = per_lang_sig.iter().next() {
for (lang, sig) in &per_lang_sig {
if sig != base_sig {
problems.push(format!(
"Key '{}' mismatch: {} {:?} vs {} {:?}",
key, base_lang, base_sig, lang, sig
));
}
}
}
}

if problems.is_empty() {
return Ok(());
}
if strict {
return Err(Error::validation_error(format!(
"Placeholder issues: {}",
problems.join(" | ")
)));
}
// Non-strict mode: treat as success
Ok(())
}

/// Collect placeholder issues without failing.
/// Returns a list of human-readable messages; empty if none.
///
/// Useful to warn in non-strict mode.
pub fn collect_placeholder_issues(&self) -> Vec<String> {
use crate::placeholder::signature;
use crate::types::Translation;
use std::collections::HashMap;

let mut map: HashMap<String, HashMap<String, Vec<Vec<String>>>> = HashMap::new();
for res in &self.resources {
for entry in &res.entries {
let sigs: Vec<Vec<String>> = match &entry.value {
Translation::Singular(v) => vec![signature(v)],
Translation::Plural(p) => p.forms.values().map(|v| signature(v)).collect(),
};
map.entry(entry.id.clone())
.or_default()
.entry(res.metadata.language.clone())
.or_default()
.push(sigs.into_iter().flatten().collect());
}
}

let mut problems = Vec::new();
for (key, langs) in map {
let mut per_lang_sig: HashMap<String, Vec<String>> = HashMap::new();
for (lang, sig_lists) in langs {
if let Some(first) = sig_lists.first() {
if sig_lists.iter().any(|s| s != first) {
problems.push(format!(
"Key '{}' in '{}': inconsistent placeholders across forms: {:?}",
key, lang, sig_lists
));
}
per_lang_sig.insert(lang, first.clone());
}
}
if let Some((base_lang, base_sig)) = per_lang_sig.iter().next() {
for (lang, sig) in &per_lang_sig {
if sig != base_sig {
problems.push(format!(
"Key '{}' mismatch: {} {:?} vs {} {:?}",
key, base_lang, base_sig, lang, sig
));
}
}
}
}
problems
}

/// Normalize placeholders in all entries (mutates in place).
/// Converts iOS patterns like `%@`, `%1$@`, `%ld` to canonical forms (%s, %1$s, %d/%u).
///
/// Example
/// ```rust
/// use langcodec::{Codec, types::{Entry, EntryStatus, Metadata, Resource, Translation}};
/// let mut codec = Codec::new();
/// codec.add_resource(Resource{
/// metadata: Metadata{ language: "en".into(), domain: String::new(), custom: Default::default() },
/// entries: vec![Entry{ id: "id".into(), value: Translation::Singular("Hello %@ and %1$@".into()), comment: None, status: EntryStatus::Translated, custom: Default::default() }]
/// });
/// codec.normalize_placeholders_in_place();
/// let v = match &codec.resources[0].entries[0].value { Translation::Singular(v) => v.clone(), _ => unreachable!() };
/// assert!(v.contains("%s") && v.contains("%1$s"));
/// ```
pub fn normalize_placeholders_in_place(&mut self) {
use crate::placeholder::normalize_placeholders;
use crate::types::Translation;
for res in &mut self.resources {
for entry in &mut res.entries {
match &mut entry.value {
Translation::Singular(v) => {
let nv = normalize_placeholders(v);
*v = nv;
}
Translation::Plural(p) => {
for v in p.forms.values_mut() {
let nv = normalize_placeholders(v);
*v = nv;
}
}
}
}
}
}

/// Merge resources with the same language by the given strategy.
///
/// This method groups resources by language and merges multiple resources
Expand Down Expand Up @@ -1584,4 +1760,136 @@ mod tests {
assert_eq!(merged.resources[0].metadata.language, "en");
assert_eq!(merged.resources[0].entries.len(), 2);
}

#[test]
fn test_validate_placeholders_across_languages() {
let mut codec = Codec::new();
// English with %1$@, French with %1$s should match after normalization
codec.add_resource(Resource {
metadata: Metadata {
language: "en".into(),
domain: "d".into(),
custom: HashMap::new(),
},
entries: vec![Entry {
id: "greet".into(),
value: Translation::Singular("Hello %1$@".into()),
comment: None,
status: EntryStatus::Translated,
custom: HashMap::new(),
}],
});
codec.add_resource(Resource {
metadata: Metadata {
language: "fr".into(),
domain: "d".into(),
custom: HashMap::new(),
},
entries: vec![Entry {
id: "greet".into(),
value: Translation::Singular("Bonjour %1$s".into()),
comment: None,
status: EntryStatus::Translated,
custom: HashMap::new(),
}],
});
assert!(codec.validate_placeholders(true).is_ok());
}

#[test]
fn test_validate_placeholders_mismatch() {
let mut codec = Codec::new();
codec.add_resource(Resource {
metadata: Metadata {
language: "en".into(),
domain: "d".into(),
custom: HashMap::new(),
},
entries: vec![Entry {
id: "count".into(),
value: Translation::Singular("%d files".into()),
comment: None,
status: EntryStatus::Translated,
custom: HashMap::new(),
}],
});
codec.add_resource(Resource {
metadata: Metadata {
language: "fr".into(),
domain: "d".into(),
custom: HashMap::new(),
},
entries: vec![Entry {
id: "count".into(),
value: Translation::Singular("%s fichiers".into()),
comment: None,
status: EntryStatus::Translated,
custom: HashMap::new(),
}],
});
assert!(codec.validate_placeholders(true).is_err());
}

#[test]
fn test_collect_placeholder_issues_non_strict_ok() {
let mut codec = Codec::new();
codec.add_resource(Resource {
metadata: Metadata {
language: "en".into(),
domain: "d".into(),
custom: HashMap::new(),
},
entries: vec![Entry {
id: "count".into(),
value: Translation::Singular("%d files".into()),
comment: None,
status: EntryStatus::Translated,
custom: HashMap::new(),
}],
});
codec.add_resource(Resource {
metadata: Metadata {
language: "fr".into(),
domain: "d".into(),
custom: HashMap::new(),
},
entries: vec![Entry {
id: "count".into(),
value: Translation::Singular("%s fichiers".into()),
comment: None,
status: EntryStatus::Translated,
custom: HashMap::new(),
}],
});
// Non-strict should be Ok but issues present
assert!(codec.validate_placeholders(false).is_ok());
let issues = codec.collect_placeholder_issues();
assert!(!issues.is_empty());
}

#[test]
fn test_normalize_placeholders_in_place() {
let mut codec = Codec::new();
codec.add_resource(Resource {
metadata: Metadata {
language: "en".into(),
domain: "d".into(),
custom: HashMap::new(),
},
entries: vec![Entry {
id: "g".into(),
value: Translation::Singular("Hello %@ and %1$@".into()),
comment: None,
status: EntryStatus::Translated,
custom: HashMap::new(),
}],
});
codec.normalize_placeholders_in_place();
let v = match &codec.resources[0].entries[0].value {
Translation::Singular(v) => v.clone(),
_ => String::new(),
};
assert!(v.contains("%s"));
assert!(v.contains("%1$s"));
}
}
Loading
Loading