Skip to content

Commit

Permalink
Add a mode to handle "pretty URLs", i.e. URIs without .html extension
Browse files Browse the repository at this point in the history
In many circumstances (GitHub Pages, Apache configured with MultiViews,
etc), web servers process URIs by appending the `.html` file extension
when no file is found at the path specified by the URI but a `.html`
file corresponding to that path _is_ found.

To allow Lychee to use the fast, offline method of checking such files
locally via the `file://` scheme, let's handle this scenario gracefully
by adding the `--auto-append-html-fileext` option.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
  • Loading branch information
dscho committed May 11, 2024
1 parent 0a54079 commit c2b561e
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 6 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,10 @@ Options:
--remap <REMAP>
Remap URI matching pattern to different URI
--auto-append-html-fileext
Automatically append `.html` to `file://` URLs when no file could be found
at the specified `path`.
--header <HEADER>
Custom request header
Expand Down
10 changes: 10 additions & 0 deletions fixtures/pretty-urls/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>For Testing pretty URLs</title>
</head>
<body>
<a href="other">other</a>
</body>
</html>
10 changes: 10 additions & 0 deletions fixtures/pretty-urls/other.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>For Testing pretty URLs</title>
</head>
<body>
<a href="index">index</a>
</body>
</html>
1 change: 1 addition & 0 deletions lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
.require_https(cfg.require_https)
.cookie_jar(cookie_jar.cloned())
.include_fragments(cfg.include_fragments)
.auto_append_html_fileext(cfg.auto_append_html_fileext)
.build()
.client()
.context("Failed to create request client")
Expand Down
10 changes: 10 additions & 0 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,15 @@ pub(crate) struct Config {
#[arg(long)]
pub(crate) remap: Vec<String>,

/// Automatically append `.html` to `file://` URIs as needed
#[serde(default)]
#[arg(
long,
long_help = "Automatically append `.html` to `file://` URLs when no file could be found
at the specified `path`."
)]
pub(crate) auto_append_html_fileext: bool,

/// Custom request header
#[arg(long)]
#[serde(default)]
Expand Down Expand Up @@ -439,6 +448,7 @@ impl Config {
exclude_loopback: false;
exclude_mail: false;
remap: Vec::<String>::new();
auto_append_html_fileext: false;
header: Vec::<String>::new();
timeout: DEFAULT_TIMEOUT_SECS;
retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS;
Expand Down
13 changes: 13 additions & 0 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1556,4 +1556,17 @@ mod cli {
// 3 failures because of missing fragments
.stdout(contains("3 Errors"));
}

#[test]
fn test_pretty_urls() {
let mut cmd = main_command();
let input = fixtures_path().join("pretty-urls");

cmd.arg("--verbose")
.arg("--auto-append-html-fileext")
.arg(input)
.assert()
.success()
.stdout(contains("0 Errors"));
}
}
30 changes: 24 additions & 6 deletions lychee-lib/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ pub struct ClientBuilder {
/// make sure rules don't conflict with each other.
remaps: Option<Remaps>,

/// Automatically append `.html` file extensions to `file://` URIs as needed
auto_append_html_fileext: bool,

/// Links matching this set of regular expressions are **always** checked.
///
/// This has higher precedence over [`ClientBuilder::excludes`], **but**
Expand Down Expand Up @@ -384,6 +387,7 @@ impl ClientBuilder {
reqwest_client,
github_client,
remaps: self.remaps,
auto_append_html_fileext: self.auto_append_html_fileext,
filter,
max_retries: self.max_retries,
retry_wait_time: self.retry_wait_time,
Expand Down Expand Up @@ -412,6 +416,9 @@ pub struct Client {
/// Optional remapping rules for URIs matching pattern.
remaps: Option<Remaps>,

/// Automatically append `.html` file extensions to `file://` URIs as needed
auto_append_html_fileext: bool,

/// Rules to decided whether each link should be checked or ignored.
filter: Filter,

Expand Down Expand Up @@ -654,14 +661,25 @@ impl Client {
let Ok(path) = uri.url.to_file_path() else {
return ErrorKind::InvalidFilePath(uri.clone()).into();
};
if !path.exists() {
return ErrorKind::InvalidFilePath(uri.clone()).into();
}
if self.include_fragments {
self.check_fragment(&path, uri).await
if path.exists() {
if self.include_fragments {
return self.check_fragment(&path, uri).await;
}
} else {
Status::Ok(StatusCode::OK)
// if the path does not end in `.html`, try to append it
if !self.auto_append_html_fileext || path.ends_with(".html") {
return ErrorKind::InvalidFilePath(uri.clone()).into();
}
// append `.html` and try again
let mut path_buf = path.to_path_buf();
path_buf.set_extension("html");
if !path_buf.exists() {
return ErrorKind::InvalidFilePath(uri.clone()).into();
} else if self.include_fragments {
return self.check_fragment(&path_buf, uri).await;
}
}
return Status::Ok(StatusCode::OK);
}

/// Checks a `file` URI's fragment.
Expand Down

0 comments on commit c2b561e

Please sign in to comment.