diff --git a/README.md b/README.md index 307d81b..bbfa286 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ # xml-sec +[![crates.io](https://img.shields.io/crates/v/xml-sec.svg)](https://crates.io/crates/xml-sec) +[![docs.rs](https://docs.rs/xml-sec/badge.svg)](https://docs.rs/xml-sec) +[![CI](https://github.com/structured-world/xml-sec/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/structured-world/xml-sec/actions/workflows/ci.yml) +[![MSRV](https://img.shields.io/badge/rustc-1.92%2B-blue.svg)](https://www.rust-lang.org) +[![License](https://img.shields.io/crates/l/xml-sec.svg)](https://github.com/structured-world/xml-sec/blob/main/LICENSE) + Pure Rust XML Security library. Drop-in replacement for libxmlsec1. **No C dependencies. No cmake. No system libraries. Just `cargo add xml-sec`.** diff --git a/src/xmldsig/parse.rs b/src/xmldsig/parse.rs index 368f9dc..2bdab2d 100644 --- a/src/xmldsig/parse.rs +++ b/src/xmldsig/parse.rs @@ -31,6 +31,14 @@ const MAX_DER_ENCODED_KEY_VALUE_LEN: usize = 8192; const MAX_DER_ENCODED_KEY_VALUE_TEXT_LEN: usize = 65_536; const MAX_DER_ENCODED_KEY_VALUE_BASE64_LEN: usize = MAX_DER_ENCODED_KEY_VALUE_LEN.div_ceil(3) * 4; const MAX_KEY_NAME_TEXT_LEN: usize = 4096; +const MAX_X509_BASE64_TEXT_LEN: usize = 262_144; +const MAX_X509_BASE64_NORMALIZED_LEN: usize = MAX_X509_BASE64_TEXT_LEN; +const MAX_X509_DECODED_BINARY_LEN: usize = MAX_X509_BASE64_NORMALIZED_LEN.div_ceil(4) * 3; +const MAX_X509_SUBJECT_NAME_TEXT_LEN: usize = 16_384; +const MAX_X509_ISSUER_NAME_TEXT_LEN: usize = 16_384; +const MAX_X509_SERIAL_NUMBER_TEXT_LEN: usize = 4096; +const MAX_X509_DATA_ENTRY_COUNT: usize = 64; +const MAX_X509_DATA_TOTAL_BINARY_LEN: usize = 1_048_576; /// Signature algorithms supported for signing and verification. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -156,22 +164,22 @@ pub enum KeyValueInfo { }, } -/// Parsed `` children (dispatch-only in P2-001). +/// Parsed `` children. #[derive(Debug, Default, Clone, PartialEq, Eq)] #[non_exhaustive] pub struct X509DataInfo { - /// Number of `` children. - pub certificate_count: usize, - /// Number of `` children. - pub subject_name_count: usize, - /// Number of `` children. - pub issuer_serial_count: usize, - /// Number of `` children. - pub ski_count: usize, - /// Number of `` children. - pub crl_count: usize, - /// Number of `` children. - pub digest_count: usize, + /// DER-encoded certificates from ``. + pub certificates: Vec>, + /// Text values from ``. + pub subject_names: Vec, + /// `(IssuerName, SerialNumber)` tuples from ``. + pub issuer_serials: Vec<(String, String)>, + /// Raw bytes from ``. + pub skis: Vec>, + /// DER-encoded CRLs from ``. + pub crls: Vec>, + /// `(Algorithm URI, digest bytes)` tuples from `dsig11:X509Digest`. + pub digests: Vec<(String, Vec)>, } /// Errors during XMLDSig element parsing. @@ -488,25 +496,52 @@ fn parse_x509_data_dispatch(node: Node) -> Result { ensure_no_non_whitespace_text(node, "X509Data")?; let mut info = X509DataInfo::default(); + let mut total_binary_len = 0usize; for child in element_children(node) { match (child.tag_name().namespace(), child.tag_name().name()) { (Some(XMLDSIG_NS), "X509Certificate") => { - info.certificate_count += 1; + ensure_no_element_children(child, "X509Certificate")?; + ensure_x509_data_entry_budget(&info)?; + let cert = decode_x509_base64(child, "X509Certificate")?; + add_x509_data_usage(&mut total_binary_len, cert.len())?; + info.certificates.push(cert); } (Some(XMLDSIG_NS), "X509SubjectName") => { - info.subject_name_count += 1; + ensure_no_element_children(child, "X509SubjectName")?; + ensure_x509_data_entry_budget(&info)?; + let subject_name = collect_text_content_bounded( + child, + MAX_X509_SUBJECT_NAME_TEXT_LEN, + "X509SubjectName", + )?; + info.subject_names.push(subject_name); } (Some(XMLDSIG_NS), "X509IssuerSerial") => { - info.issuer_serial_count += 1; + ensure_x509_data_entry_budget(&info)?; + let issuer_serial = parse_x509_issuer_serial(child)?; + info.issuer_serials.push(issuer_serial); } (Some(XMLDSIG_NS), "X509SKI") => { - info.ski_count += 1; + ensure_no_element_children(child, "X509SKI")?; + ensure_x509_data_entry_budget(&info)?; + let ski = decode_x509_base64(child, "X509SKI")?; + add_x509_data_usage(&mut total_binary_len, ski.len())?; + info.skis.push(ski); } (Some(XMLDSIG_NS), "X509CRL") => { - info.crl_count += 1; + ensure_no_element_children(child, "X509CRL")?; + ensure_x509_data_entry_budget(&info)?; + let crl = decode_x509_base64(child, "X509CRL")?; + add_x509_data_usage(&mut total_binary_len, crl.len())?; + info.crls.push(crl); } (Some(XMLDSIG11_NS), "X509Digest") => { - info.digest_count += 1; + ensure_no_element_children(child, "X509Digest")?; + ensure_x509_data_entry_budget(&info)?; + let algorithm = required_algorithm_attr(child, "X509Digest")?; + let digest = decode_x509_base64(child, "X509Digest")?; + add_x509_data_usage(&mut total_binary_len, digest.len())?; + info.digests.push((algorithm.to_string(), digest)); } (Some(XMLDSIG_NS), child_name) | (Some(XMLDSIG11_NS), child_name) => { return Err(ParseError::InvalidStructure(format!( @@ -520,6 +555,136 @@ fn parse_x509_data_dispatch(node: Node) -> Result { Ok(info) } +fn ensure_x509_data_entry_budget(info: &X509DataInfo) -> Result<(), ParseError> { + let total_entries = info.certificates.len() + + info.subject_names.len() + + info.issuer_serials.len() + + info.skis.len() + + info.crls.len() + + info.digests.len(); + if total_entries >= MAX_X509_DATA_ENTRY_COUNT { + return Err(ParseError::InvalidStructure( + "X509Data contains too many entries".into(), + )); + } + Ok(()) +} + +fn add_x509_data_usage(total_binary_len: &mut usize, delta: usize) -> Result<(), ParseError> { + *total_binary_len = total_binary_len.checked_add(delta).ok_or_else(|| { + ParseError::InvalidStructure("X509Data exceeds maximum allowed total binary length".into()) + })?; + if *total_binary_len > MAX_X509_DATA_TOTAL_BINARY_LEN { + return Err(ParseError::InvalidStructure( + "X509Data exceeds maximum allowed total binary length".into(), + )); + } + Ok(()) +} + +fn decode_x509_base64( + node: Node<'_, '_>, + element_name: &'static str, +) -> Result, ParseError> { + use base64::Engine; + use base64::engine::general_purpose::STANDARD; + + let mut cleaned = String::new(); + let mut raw_text_len = 0usize; + for text in node + .children() + .filter(|child| child.is_text()) + .filter_map(|child| child.text()) + { + if raw_text_len.saturating_add(text.len()) > MAX_X509_BASE64_TEXT_LEN { + return Err(ParseError::InvalidStructure(format!( + "{element_name} exceeds maximum allowed text length" + ))); + } + raw_text_len = raw_text_len.saturating_add(text.len()); + normalize_xml_base64_text(text, &mut cleaned).map_err(|err| { + ParseError::Base64(format!( + "invalid XML whitespace U+{:04X} in {element_name}", + err.invalid_byte + )) + })?; + if cleaned.len() > MAX_X509_BASE64_NORMALIZED_LEN { + return Err(ParseError::InvalidStructure(format!( + "{element_name} exceeds maximum allowed base64 length" + ))); + } + } + + let decoded = STANDARD + .decode(&cleaned) + .map_err(|e| ParseError::Base64(format!("{element_name}: {e}")))?; + if decoded.is_empty() { + return Err(ParseError::InvalidStructure(format!( + "{element_name} must not be empty" + ))); + } + if decoded.len() > MAX_X509_DECODED_BINARY_LEN { + return Err(ParseError::InvalidStructure(format!( + "{element_name} exceeds maximum allowed binary length" + ))); + } + Ok(decoded) +} + +fn parse_x509_issuer_serial(node: Node<'_, '_>) -> Result<(String, String), ParseError> { + verify_ds_element(node, "X509IssuerSerial")?; + ensure_no_non_whitespace_text(node, "X509IssuerSerial")?; + + let children = element_children(node).collect::>(); + if children.len() != 2 { + return Err(ParseError::InvalidStructure( + "X509IssuerSerial must contain exactly X509IssuerName then X509SerialNumber".into(), + )); + } + if !matches!( + ( + children[0].tag_name().namespace(), + children[0].tag_name().name() + ), + (Some(XMLDSIG_NS), "X509IssuerName") + ) { + return Err(ParseError::InvalidStructure( + "X509IssuerSerial must contain X509IssuerName as the first child element".into(), + )); + } + if !matches!( + ( + children[1].tag_name().namespace(), + children[1].tag_name().name() + ), + (Some(XMLDSIG_NS), "X509SerialNumber") + ) { + return Err(ParseError::InvalidStructure( + "X509IssuerSerial must contain X509SerialNumber as the second child element".into(), + )); + } + + let issuer_node = children[0]; + ensure_no_element_children(issuer_node, "X509IssuerName")?; + let issuer_name = + collect_text_content_bounded(issuer_node, MAX_X509_ISSUER_NAME_TEXT_LEN, "X509IssuerName")?; + + let serial_node = children[1]; + ensure_no_element_children(serial_node, "X509SerialNumber")?; + let serial_number = collect_text_content_bounded( + serial_node, + MAX_X509_SERIAL_NUMBER_TEXT_LEN, + "X509SerialNumber", + )?; + if issuer_name.trim().is_empty() || serial_number.trim().is_empty() { + return Err(ParseError::InvalidStructure( + "X509IssuerSerial requires non-empty X509IssuerName and X509SerialNumber".into(), + )); + } + + Ok((issuer_name, serial_number)) +} + /// Base64-decode a digest value string, stripping whitespace. /// /// XMLDSig allows whitespace within base64 content (line-wrapped encodings). @@ -782,8 +947,15 @@ mod tests { - MIIB + AQID CN=Example + + CN=CA + 42 + + AQIDBA== + BAUGBw== + CAkK AQIDBA== "#; @@ -803,12 +975,15 @@ mod tests { assert_eq!( key_info.sources[2], KeyInfoSource::X509Data(X509DataInfo { - certificate_count: 1, - subject_name_count: 1, - issuer_serial_count: 0, - ski_count: 0, - crl_count: 0, - digest_count: 0, + certificates: vec![vec![1, 2, 3]], + subject_names: vec!["CN=Example".into()], + issuer_serials: vec![("CN=CA".into(), "42".into())], + skis: vec![vec![1, 2, 3, 4]], + crls: vec![vec![4, 5, 6, 7]], + digests: vec![( + "http://www.w3.org/2001/04/xmlenc#sha256".into(), + vec![8, 9, 10] + )], }) ); assert_eq!( @@ -881,6 +1056,190 @@ mod tests { assert!(matches!(err, ParseError::InvalidStructure(_))); } + #[test] + fn parse_key_info_rejects_x509_issuer_serial_without_required_children() { + let xml = r#" + + + CN=CA + + + "#; + let doc = Document::parse(xml).unwrap(); + + let err = parse_key_info(doc.root_element()).unwrap_err(); + assert!(matches!(err, ParseError::InvalidStructure(_))); + } + + #[test] + fn parse_key_info_rejects_x509_issuer_serial_with_duplicate_issuer_name() { + let xml = r#" + + + CN=CA-1 + CN=CA-2 + 42 + + + "#; + let doc = Document::parse(xml).unwrap(); + + let err = parse_key_info(doc.root_element()).unwrap_err(); + assert!(matches!(err, ParseError::InvalidStructure(_))); + } + + #[test] + fn parse_key_info_rejects_x509_issuer_serial_with_duplicate_serial_number() { + let xml = r#" + + + CN=CA + 1 + 2 + + + "#; + let doc = Document::parse(xml).unwrap(); + + let err = parse_key_info(doc.root_element()).unwrap_err(); + assert!(matches!(err, ParseError::InvalidStructure(_))); + } + + #[test] + fn parse_key_info_rejects_x509_issuer_serial_with_whitespace_only_values() { + let xml = r#" + + + + + + + + + "#; + let doc = Document::parse(xml).unwrap(); + + let err = parse_key_info(doc.root_element()).unwrap_err(); + assert!(matches!(err, ParseError::InvalidStructure(_))); + } + + #[test] + fn parse_key_info_rejects_x509_issuer_serial_with_wrong_child_order() { + let xml = r#" + + + 42 + CN=CA + + + "#; + let doc = Document::parse(xml).unwrap(); + + let err = parse_key_info(doc.root_element()).unwrap_err(); + assert!(matches!(err, ParseError::InvalidStructure(_))); + } + + #[test] + fn parse_key_info_rejects_x509_issuer_serial_with_extra_child_element() { + let xml = r#" + + + CN=CA + 42 + + + + "#; + let doc = Document::parse(xml).unwrap(); + + let err = parse_key_info(doc.root_element()).unwrap_err(); + assert!(matches!(err, ParseError::InvalidStructure(_))); + } + + #[test] + fn parse_key_info_rejects_x509_digest_without_algorithm() { + let xml = r#" + + AQID + + "#; + let doc = Document::parse(xml).unwrap(); + + let err = parse_key_info(doc.root_element()).unwrap_err(); + assert!(matches!(err, ParseError::InvalidStructure(_))); + } + + #[test] + fn parse_key_info_rejects_invalid_x509_certificate_base64() { + let xml = r#" + + %%%invalid%%% + + "#; + let doc = Document::parse(xml).unwrap(); + + let err = parse_key_info(doc.root_element()).unwrap_err(); + assert!(matches!(err, ParseError::Base64(_))); + } + + #[test] + fn parse_key_info_rejects_x509_data_exceeding_entry_budget() { + let subjects = (0..(MAX_X509_DATA_ENTRY_COUNT + 1)) + .map(|idx| format!("CN={idx}")) + .collect::>() + .join(""); + let xml = format!( + "{subjects}" + ); + let doc = Document::parse(&xml).unwrap(); + + let err = parse_key_info(doc.root_element()).unwrap_err(); + assert!(matches!(err, ParseError::InvalidStructure(_))); + } + + #[test] + fn parse_key_info_rejects_x509_data_exceeding_total_binary_budget() { + let payload = base64::engine::general_purpose::STANDARD.encode(vec![0u8; 190_000]); + let certs = (0..6) + .map(|_| format!("{payload}")) + .collect::>() + .join(""); + let xml = format!( + "{certs}" + ); + let doc = Document::parse(&xml).unwrap(); + + let err = parse_key_info(doc.root_element()).unwrap_err(); + assert!(matches!(err, ParseError::InvalidStructure(_))); + } + + #[test] + fn parse_key_info_accepts_large_textual_x509_entries_within_entry_budget() { + let issuer_name = "C".repeat(MAX_X509_ISSUER_NAME_TEXT_LEN); + let serial_number = "7".repeat(MAX_X509_SERIAL_NUMBER_TEXT_LEN); + let issuer_serials = (0..52) + .map(|_| { + format!( + "{issuer_name}{serial_number}" + ) + }) + .collect::>() + .join(""); + let xml = format!( + "{issuer_serials}" + ); + let doc = Document::parse(&xml).unwrap(); + + let key_info = parse_key_info(doc.root_element()).unwrap(); + let parsed = match &key_info.sources[0] { + KeyInfoSource::X509Data(x509) => x509, + _ => panic!("expected X509Data source"), + }; + assert_eq!(parsed.issuer_serials.len(), 52); + } + #[test] fn parse_key_info_accepts_x509data_with_only_foreign_namespace_children() { let xml = r#"