Skip to content

Commit

Permalink
Improve 'Content-Disposition' header parsing.
Browse files Browse the repository at this point in the history
This commit makes the following improvements:

  * `name` and `filename` are allowed in any order
  * Escaped quotes are recognized and unescaped
  * Whitespace surrounding value is allowed and removed

Closes #53
Resolves #57

Co-authored-by: Filippo Berto <berto.f@protonmail.com>
  • Loading branch information
SergioBenitez and bertof committed Dec 14, 2023
1 parent da2222c commit bca313f
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 47 deletions.
154 changes: 111 additions & 43 deletions src/constants.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::borrow::Cow;

pub(crate) const DEFAULT_WHOLE_STREAM_SIZE_LIMIT: u64 = std::u64::MAX;
pub(crate) const DEFAULT_PER_FIELD_SIZE_LIMIT: u64 = std::u64::MAX;

Expand All @@ -15,35 +17,60 @@ pub(crate) enum ContentDispositionAttr {
FileName,
}

fn trim_ascii_ws_start(bytes: &[u8]) -> &[u8] {
bytes
.iter()
.position(|b| !b.is_ascii_whitespace())
.map_or_else(|| &bytes[bytes.len()..], |i| &bytes[i..])
}

fn trim_ascii_ws_then(bytes: &[u8], char: u8) -> Option<&[u8]> {
match trim_ascii_ws_start(bytes) {
[first, rest @ ..] if *first == char => Some(rest),
_ => None,
}
}

impl ContentDispositionAttr {
/// Extract ContentDisposition Attribute from header.
///
/// Some older clients may not quote the name or filename, so we allow them
pub fn extract_from<'h>(&self, header: &'h [u8]) -> Option<&'h [u8]> {
/// Some older clients may not quote the name or filename, so we allow them,
/// but require them to be percent encoded. Only allocates if percent
/// decoding, and there are characters that need to be decoded.
pub fn extract_from<'h>(&self, mut header: &'h [u8]) -> Option<Cow<'h, str>> {
let prefix = match self {
ContentDispositionAttr::Name => &b"name="[..],
ContentDispositionAttr::FileName => &b"filename="[..],
ContentDispositionAttr::Name => &b"name"[..],
ContentDispositionAttr::FileName => &b"filename"[..],
};

if let Some(i) = memchr::memmem::find(header, prefix) {
// Check if this is malformed, with `filename` coming first.
if *self == ContentDispositionAttr::Name && i > 0 && header[i - 1] == b'e' {
return None;
while let Some(i) = memchr::memmem::find(header, prefix) {
// Check if we found a superstring of `prefix`; continue if so.
let suffix = &header[(i + prefix.len())..];
if i > 0 && !(header[i - 1].is_ascii_whitespace() || header[i - 1] == b';') {
header = suffix;
continue;
}

// Handle quoted strings first, then unquoted string.
// FIXME: According to RFC6266 4.1, a 'quoted-string' (RFC 2616 2.2)
// can contain a 'quoted-pair', which can be used to escape a quote
// character in a name with `\`. That is, "a\"b" is a valid name.
// But this routine would truncate it to `a\`; this is wrong.
let rest = &header[(i + prefix.len())..];
if rest.starts_with(b"\"") {
let k = memchr::memchr(b'"', &rest[1..])?;
return Some(&rest[1..(k + 1)]);
// Now find and trim the `=`. Handle quoted strings first.
let rest = trim_ascii_ws_then(suffix, b'=')?;
let (bytes, is_escaped) = if let Some(rest) = trim_ascii_ws_then(rest, b'"') {
let (mut k, mut escaped) = (memchr::memchr(b'"', rest)?, false);
while k > 0 && rest[k - 1] == b'\\' {
escaped = true;
k = k + 1 + memchr::memchr(b'"', &rest[(k + 1)..])?;
}

(&rest[..k], escaped)
} else {
let j = memchr::memchr(b';', rest).unwrap_or(rest.len());
return Some(&rest[..j]);
}
let rest = trim_ascii_ws_start(rest);
let j = memchr::memchr2(b';', b' ', rest).unwrap_or(rest.len());
(&rest[..j], false)
};

return match std::str::from_utf8(bytes).ok()? {
name if is_escaped => Some(name.replace(r#"\""#, "\"").into()),
name => Some(name.into()),
};
}

None
Expand All @@ -59,7 +86,25 @@ mod tests {
let val = br#"form-data; name="my_field""#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), b"my_field");
assert_eq!(name.unwrap(), "my_field");
assert!(filename.is_none());

let val = br#"form-data; name=my_field "#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), "my_field");
assert!(filename.is_none());

let val = br#"form-data; name = my_field "#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), "my_field");
assert!(filename.is_none());

let val = br#"form-data; name = "#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), "");
assert!(filename.is_none());
}

Expand All @@ -68,20 +113,20 @@ mod tests {
let val = br#"form-data; name="my_field"; filename="file abc.txt""#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), b"my_field");
assert_eq!(filename.unwrap(), b"file abc.txt");
assert_eq!(name.unwrap(), "my_field");
assert_eq!(filename.unwrap(), "file abc.txt");

let val = "form-data; name=\"你好\"; filename=\"file abc.txt\"".as_bytes();
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), "你好".as_bytes());
assert_eq!(filename.unwrap(), b"file abc.txt");
assert_eq!(name.unwrap(), "你好");
assert_eq!(filename.unwrap(), "file abc.txt");

let val = "form-data; name=\"কখগ\"; filename=\"你好.txt\"".as_bytes();
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), "কখগ".as_bytes());
assert_eq!(filename.unwrap(), "你好.txt".as_bytes());
assert_eq!(name.unwrap(), "কখগ");
assert_eq!(filename.unwrap(), "你好.txt");
}

#[test]
Expand All @@ -91,64 +136,87 @@ mod tests {
let val = br#"form-data; filename="file-name.txt""#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(filename.unwrap(), b"file-name.txt");
assert_eq!(filename.unwrap(), "file-name.txt");
assert!(name.is_none());

let val = "form-data; filename=\"কখগ-你好.txt\"".as_bytes();
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(filename.unwrap(), "কখগ-你好.txt".as_bytes());
assert_eq!(filename.unwrap(), "কখগ-你好.txt");
assert!(name.is_none());
}

#[test]
fn test_content_distribution_misordered_fields() {
let val = br#"form-data; filename=file-name.txt; name=file"#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(filename.unwrap(), "file-name.txt");
assert_eq!(name.unwrap(), "file");

let val = br#"form-data; filename="file-name.txt"; name="file""#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(filename.unwrap(), "file-name.txt");
assert_eq!(name.unwrap(), "file");

let val = "form-data; filename=\"你好.txt\"; name=\"কখগ\"".as_bytes();
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), "কখগ");
assert_eq!(filename.unwrap(), "你好.txt");
}

#[test]
fn test_content_disposition_name_unquoted() {
let val = br#"form-data; name=my_field"#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), b"my_field");
assert_eq!(name.unwrap(), "my_field");
assert!(filename.is_none());

let val = br#"form-data; name=my_field; filename=file-name.txt"#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), b"my_field");
assert_eq!(filename.unwrap(), b"file-name.txt");
assert_eq!(name.unwrap(), "my_field");
assert_eq!(filename.unwrap(), "file-name.txt");
}

#[test]
fn test_content_disposition_name_quoted() {
let val = br#"form-data; name="my;f;ield""#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), b"my;f;ield");
assert_eq!(name.unwrap(), "my;f;ield");
assert!(filename.is_none());

let val = br#"form-data; name=my_field; filename="file;name.txt""#;
let val = br#"form-data; name=my_field; filename = "file;name.txt""#;
let name = ContentDispositionAttr::Name.extract_from(val);
assert_eq!(name.unwrap(), "my_field");
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), b"my_field");
assert_eq!(filename.unwrap(), b"file;name.txt");
assert_eq!(filename.unwrap(), "file;name.txt");

let val = br#"form-data; name=; filename=filename.txt"#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), b"");
assert_eq!(filename.unwrap(), b"filename.txt");
assert_eq!(name.unwrap(), "");
assert_eq!(filename.unwrap(), "filename.txt");

let val = br#"form-data; name=";"; filename=";""#;
let name = ContentDispositionAttr::Name.extract_from(val);
let filename = ContentDispositionAttr::FileName.extract_from(val);
assert_eq!(name.unwrap(), b";");
assert_eq!(filename.unwrap(), b";");
assert_eq!(name.unwrap(), ";");
assert_eq!(filename.unwrap(), ";");
}

// FIXME: This test should pass.
#[test]
#[should_panic]
fn test_content_disposition_name_escaped_quote() {
let val = br#"form-data; name="my\"field\"name""#;
let name = ContentDispositionAttr::Name.extract_from(val);
assert_eq!(name.unwrap(), b"my\"field\"name");
assert_eq!(name.unwrap(), r#"my"field"name"#);

let val = br#"form-data; name="myfield\"name""#;
let name = ContentDispositionAttr::Name.extract_from(val);
assert_eq!(name.unwrap(), r#"myfield"name"#);
}
}
6 changes: 2 additions & 4 deletions src/content_disposition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ impl ContentDisposition {

let field_name = content_disposition
.and_then(|val| ContentDispositionAttr::Name.extract_from(val))
.and_then(|attr| std::str::from_utf8(attr).ok())
.map(String::from);
.map(|attr| attr.into_owned());

let file_name = content_disposition
.and_then(|val| ContentDispositionAttr::FileName.extract_from(val))
.and_then(|attr| std::str::from_utf8(attr).ok())
.map(String::from);
.map(|attr| attr.into_owned());

ContentDisposition { field_name, file_name }
}
Expand Down

0 comments on commit bca313f

Please sign in to comment.