Skip to content

Commit 114862a

Browse files
authored
fix: make sure to deal with malformed UTF-8 in file names (#374)
1 parent 7d61c91 commit 114862a

File tree

2 files changed

+61
-89
lines changed

2 files changed

+61
-89
lines changed

src/directory_listing.rs

Lines changed: 41 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ use headers::{ContentLength, ContentType, HeaderMapExt};
1313
use humansize::FormatSize;
1414
use hyper::{Body, Method, Response, StatusCode};
1515
use mime_guess::mime;
16-
use percent_encoding::{percent_decode_str, utf8_percent_encode, AsciiSet, NON_ALPHANUMERIC};
16+
use percent_encoding::{percent_decode_str, percent_encode, AsciiSet, NON_ALPHANUMERIC};
1717
use serde::{Serialize, Serializer};
18+
use std::ffi::{OsStr, OsString};
1819
use std::future::Future;
1920
use std::io;
2021
use std::path::Path;
@@ -152,16 +153,15 @@ enum FileType {
152153
/// Defines a file entry and its properties.
153154
#[derive(Serialize)]
154155
struct FileEntry {
155-
name: String,
156-
#[serde(skip_serializing)]
157-
name_encoded: String,
156+
#[serde(serialize_with = "serialize_name")]
157+
name: OsString,
158158
#[serde(serialize_with = "serialize_mtime")]
159159
mtime: Option<DateTime<Local>>,
160160
#[serde(skip_serializing_if = "Option::is_none")]
161161
size: Option<u64>,
162162
r#type: FileType,
163163
#[serde(skip_serializing)]
164-
uri: Option<String>,
164+
uri: String,
165165
}
166166

167167
impl FileEntry {
@@ -205,36 +205,19 @@ fn read_dir_entries(
205205
}
206206
};
207207

208-
// FIXME: handle non-Unicode file names properly via OsString
209-
let name = match dir_entry
210-
.file_name()
211-
.into_string()
212-
.map_err(|err| anyhow::anyhow!(err.into_string().unwrap_or_default()))
213-
{
214-
Ok(s) => s,
215-
Err(err) => {
216-
tracing::error!(
217-
"unable to resolve name for file or directory entry (skipped): {:?}",
218-
err
219-
);
220-
continue;
221-
}
222-
};
208+
let name = dir_entry.file_name();
223209

224210
// Check and ignore the current hidden file/directory (dotfile) if feature enabled
225-
if ignore_hidden_files && name.starts_with('.') {
211+
if ignore_hidden_files && name.as_encoded_bytes().first().is_some_and(|c| *c == b'.') {
226212
continue;
227213
}
228214

229-
let mut name_encoded = utf8_percent_encode(&name, PERCENT_ENCODE_SET).to_string();
230-
let mut size = None;
231-
232-
if meta.is_dir() {
233-
name_encoded.push('/');
215+
let (r#type, size) = if meta.is_dir() {
234216
dirs_count += 1;
217+
(FileType::Directory, None)
235218
} else if meta.is_file() {
236-
size = Some(meta.len());
237219
files_count += 1;
220+
(FileType::File, Some(meta.len()))
238221
} else if meta.file_type().is_symlink() {
239222
// NOTE: we resolve the symlink path below to just know if is a directory or not.
240223
// Hwever, we are still showing the symlink name but not the resolved name.
@@ -246,7 +229,7 @@ fn read_dir_entries(
246229
tracing::error!(
247230
"unable to resolve `{}` symlink path (skipped): {:?}",
248231
symlink.display(),
249-
err
232+
err,
250233
);
251234
continue;
252235
}
@@ -258,59 +241,43 @@ fn read_dir_entries(
258241
tracing::error!(
259242
"unable to resolve metadata for `{}` symlink (skipped): {:?}",
260243
symlink.display(),
261-
err
244+
err,
262245
);
263246
continue;
264247
}
265248
};
266249
if symlink_meta.is_dir() {
267-
name_encoded.push('/');
268250
dirs_count += 1;
251+
(FileType::Directory, None)
269252
} else {
270-
size = Some(meta.len());
271253
files_count += 1;
254+
(FileType::File, Some(symlink_meta.len()))
272255
}
273256
} else {
274257
continue;
275-
}
258+
};
259+
260+
let name_encoded = percent_encode(name.as_encoded_bytes(), PERCENT_ENCODE_SET).to_string();
276261

277-
let mut uri = None;
278262
// NOTE: Use relative paths by default independently of
279263
// the "redirect trailing slash" feature.
280264
// However, when "redirect trailing slash" is disabled
281265
// and a request path doesn't contain a trailing slash then
282266
// entries should contain the "parent/entry-name" as a link format.
283267
// Otherwise, we just use the "entry-name" as a link (default behavior).
284268
// Note that in both cases, we add a trailing slash if the entry is a directory.
285-
if !base_path.ends_with('/') {
286-
let base_path = Path::new(base_path);
287-
let parent_dir = base_path.parent().unwrap_or(base_path);
288-
let mut base_dir = base_path;
289-
if base_path != parent_dir {
290-
base_dir = match base_path.strip_prefix(parent_dir) {
291-
Ok(v) => v,
292-
Err(err) => {
293-
tracing::error!(
294-
"unable to strip parent path prefix for `{}` (skipped): {:?}",
295-
base_path.display(),
296-
err
297-
);
298-
continue;
299-
}
300-
};
301-
}
302-
303-
let mut base_str = String::new();
304-
if !base_dir.starts_with("/") {
305-
let base_dir = base_dir.to_str().unwrap_or_default();
306-
if !base_dir.is_empty() {
307-
base_str.push_str(base_dir);
308-
}
309-
base_str.push('/');
310-
}
269+
let mut uri = if !base_path.ends_with('/') && !base_path.is_empty() {
270+
let parent = base_path
271+
.rsplit_once('/')
272+
.map(|(_, parent)| parent)
273+
.unwrap_or(base_path);
274+
format!("{parent}/{name_encoded}")
275+
} else {
276+
name_encoded
277+
};
311278

312-
base_str.push_str(&name_encoded);
313-
uri = Some(base_str);
279+
if r#type == FileType::Directory {
280+
uri.push('/');
314281
}
315282

316283
let mtime = match parse_last_modified(meta.modified()?) {
@@ -320,15 +287,9 @@ fn read_dir_entries(
320287
None
321288
}
322289
};
323-
let r#type = if meta.is_dir() {
324-
FileType::Directory
325-
} else {
326-
FileType::File
327-
};
328290

329291
file_entries.push(FileEntry {
330292
name,
331-
name_encoded,
332293
mtime,
333294
size,
334295
r#type,
@@ -379,7 +340,7 @@ fn read_dir_entries(
379340
files_count,
380341
&mut file_entries,
381342
order_code,
382-
)?
343+
)
383344
}
384345
};
385346

@@ -403,6 +364,11 @@ fn json_auto_index(entries: &mut [FileEntry], order_code: u8) -> Result<String>
403364
Ok(serde_json::to_string(entries)?)
404365
}
405366

367+
/// Serialize FileEntry::name
368+
fn serialize_name<S: Serializer>(name: &OsStr, serializer: S) -> Result<S::Ok, S::Error> {
369+
serializer.serialize_str(&name.to_string_lossy())
370+
}
371+
406372
/// Serialize FileEntry::mtime field
407373
fn serialize_mtime<S: Serializer>(
408374
mtime: &Option<DateTime<Local>>,
@@ -425,13 +391,13 @@ fn html_auto_index<'a>(
425391
files_count: usize,
426392
entries: &'a mut [FileEntry],
427393
order_code: u8,
428-
) -> Result<String> {
394+
) -> String {
429395
use maud::{html, DOCTYPE};
430396

431397
let sort_attrs = sort_file_entries(entries, order_code);
432-
let current_path = percent_decode_str(base_path).decode_utf8()?.to_string();
398+
let current_path = percent_decode_str(base_path).decode_utf8_lossy();
433399

434-
Ok(html! {
400+
html! {
435401
(DOCTYPE)
436402
html {
437403
head {
@@ -490,8 +456,8 @@ fn html_auto_index<'a>(
490456
@for entry in entries {
491457
tr {
492458
td {
493-
a href=(entry.uri.as_ref().unwrap_or(&entry.name_encoded)) {
494-
(entry.name)
459+
a href=(entry.uri) {
460+
(entry.name.to_string_lossy())
495461
@if entry.is_dir() {
496462
"/"
497463
}
@@ -519,7 +485,7 @@ fn html_auto_index<'a>(
519485
}
520486
}
521487
}
522-
}.into())
488+
}.into()
523489
}
524490

525491
/// Sort a list of file entries by a specific order code.
@@ -532,7 +498,7 @@ fn sort_file_entries(files: &mut [FileEntry], order_code: u8) -> SortingAttr<'_>
532498
match order_code {
533499
0 | 1 => {
534500
// Name (asc, desc)
535-
files.sort_by_cached_key(|f| f.name.to_lowercase());
501+
files.sort_by_cached_key(|f| f.name.to_string_lossy().to_lowercase());
536502
if order_code == 1 {
537503
files.reverse();
538504
} else {

src/file_path.rs

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,7 @@
77
88
use hyper::StatusCode;
99
use percent_encoding::percent_decode_str;
10-
use std::{
11-
borrow::Cow,
12-
path::{Component, Path, PathBuf},
13-
};
10+
use std::path::{Component, Path, PathBuf};
1411

1512
/// SWS Path extensions trait.
1613
pub(crate) trait PathExt {
@@ -30,20 +27,29 @@ impl PathExt for Path {
3027
}
3128
}
3229

33-
fn decode_tail_path(tail: &str) -> Result<Cow<'_, str>, StatusCode> {
34-
match percent_decode_str(tail.trim_start_matches('/')).decode_utf8() {
35-
Ok(p) => Ok(p),
36-
Err(err) => {
37-
tracing::debug!("dir: failed to decode route={:?}: {:?}", tail, err);
38-
Err(StatusCode::UNSUPPORTED_MEDIA_TYPE)
39-
}
40-
}
30+
#[cfg(unix)]
31+
fn path_from_bytes(bytes: &[u8]) -> PathBuf {
32+
use std::ffi::OsStr;
33+
use std::os::unix::ffi::OsStrExt;
34+
35+
OsStr::from_bytes(bytes).into()
36+
}
37+
38+
#[cfg(windows)]
39+
fn path_from_bytes(bytes: &[u8]) -> PathBuf {
40+
// This should really be OsStr::from_encoded_bytes_unchecked() but it’s
41+
// unsafe. With this fallback non-Unicode file names will result in 404.
42+
String::from_utf8_lossy(bytes).into_owned().into()
43+
}
44+
45+
fn decode_tail_path(tail: &str) -> PathBuf {
46+
let bytes = percent_decode_str(tail.trim_start_matches('/')).collect::<Vec<_>>();
47+
path_from_bytes(&bytes)
4148
}
4249

4350
/// Sanitizes a base/tail paths and then it returns an unified one.
4451
pub(crate) fn sanitize_path(base: &Path, tail: &str) -> Result<PathBuf, StatusCode> {
45-
let path_decoded = decode_tail_path(tail)?;
46-
let path_decoded = Path::new(&*path_decoded);
52+
let path_decoded = decode_tail_path(tail);
4753
let mut full_path = base.to_path_buf();
4854
tracing::trace!("dir: base={:?}, route={:?}", full_path, path_decoded);
4955

0 commit comments

Comments
 (0)