From 19b0e613f6a9165edba365d2e7d00e03f83f8470 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Sun, 23 Nov 2025 16:48:56 +0100 Subject: [PATCH] re-encode original uri when extracting rustdoc params --- src/web/crate_details.rs | 30 +++++++++++++++++++++++++----- src/web/escaped_uri.rs | 32 ++++++++++++++++++++++++++++++-- src/web/extractors/rustdoc.rs | 22 ++++++++++------------ src/web/mod.rs | 4 ++++ src/web/routes.rs | 6 +++++- src/web/rustdoc.rs | 2 +- 6 files changed, 75 insertions(+), 21 deletions(-) diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index 540a31d61..797ebdc78 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -719,8 +719,8 @@ mod tests { use anyhow::Error; use kuchikiki::traits::TendrilSink; use pretty_assertions::assert_eq; - use reqwest::StatusCode; use std::collections::HashMap; + use test_case::test_case; async fn release_build_status( conn: &mut sqlx::PgConnection, @@ -2062,6 +2062,26 @@ mod tests { }); } + #[tokio::test(flavor = "multi_thread")] + #[test_case("/crate/rayon/^1.11.0", "/crate/rayon/1.11.0")] + #[test_case("/crate/rayon/%5E1.11.0", "/crate/rayon/1.11.0")] + #[test_case("/crate/rayon", "/crate/rayon/latest"; "without trailing slash")] + #[test_case("/crate/rayon/", "/crate/rayon/latest")] + async fn test_version_redirects(path: &str, expected_target: &str) -> anyhow::Result<()> { + let env = TestEnvironment::new().await?; + env.fake_release() + .await + .name("rayon") + .version("1.11.0") + .create() + .await?; + let web = env.web_app().await; + + web.assert_redirect(path, expected_target).await?; + + Ok(()) + } + #[test] fn readme() { async_wrapper(|env| async move { @@ -2199,10 +2219,10 @@ path = "src/lib.rs" .create() .await?; - assert_eq!( - env.web_app().await.get("/crate/dummy%3E").await?.status(), - StatusCode::FOUND - ); + env.web_app() + .await + .assert_redirect_unchecked("/crate/dummy%3E", "/crate/dummy%3E/latest") + .await?; Ok(()) }) diff --git a/src/web/escaped_uri.rs b/src/web/escaped_uri.rs index 6512ce58b..a64f76808 100644 --- a/src/web/escaped_uri.rs +++ b/src/web/escaped_uri.rs @@ -1,4 +1,4 @@ -use crate::web::encode_url_path; +use crate::web::{encode_url_path, url_decode}; use askama::filters::HtmlSafe; use http::{Uri, uri::PathAndQuery}; use std::{borrow::Borrow, fmt::Display, iter, str::FromStr}; @@ -8,6 +8,8 @@ use url::form_urlencoded; /// /// Ensures that the path part is always properly percent-encoded, including some characters /// that http::Uri would allow, but we still want to encode, like umlauts. +/// Also ensures that some characters are _not_ encoded that sometimes arrive percent-encoded +/// from browsers, so we then can easily compare URIs, knowing they are encoded the same way. /// /// Also we support fragments, with http::Uri doesn't support yet. /// See https://github.com/hyperium/http/issues/775 @@ -20,7 +22,16 @@ pub struct EscapedURI { impl EscapedURI { pub fn from_uri(uri: Uri) -> Self { if uri.path_and_query().is_some() { - let encoded_path = encode_url_path(uri.path()); + let encoded_path = encode_url_path( + // we re-encode the path so we know all EscapedURI instances are comparable and + // encoded the same way. + // Example: "^" is not escaped when axum generates an Uri, we also didn't do it + // for a long time so we have nicers URLs with caret, since it's supported by + // most browsers to be shown in the URL bar. + // But: the actual request will have it encoded, which means the `Uri` + // we get from axum when handling the request will have it encoded. + &url_decode(uri.path()).expect("was in Uri, so has to have been correct"), + ); if uri.path() == encoded_path { Self { uri, @@ -223,6 +234,22 @@ impl From for EscapedURI { } } +impl TryFrom for EscapedURI { + type Error = http::uri::InvalidUri; + + fn try_from(value: String) -> Result { + value.parse() + } +} + +impl TryFrom<&str> for EscapedURI { + type Error = http::uri::InvalidUri; + + fn try_from(value: &str) -> Result { + value.parse() + } +} + impl PartialEq for &EscapedURI { fn eq(&self, other: &String) -> bool { *self == other @@ -286,6 +313,7 @@ mod tests { } #[test_case("/something" => "/something"; "plain path")] + #[test_case("/semver/%5E1.2.3" => "/semver/^1.2.3"; "we encode less")] #[test_case("/somethingäöü" => "/something%C3%A4%C3%B6%C3%BC"; "path with umlauts")] fn test_escaped_uri_encodes_path_from_uri(path: &str) -> String { let uri: Uri = path.parse().unwrap(); diff --git a/src/web/extractors/rustdoc.rs b/src/web/extractors/rustdoc.rs index 9b6c64682..6b2a60970 100644 --- a/src/web/extractors/rustdoc.rs +++ b/src/web/extractors/rustdoc.rs @@ -4,7 +4,7 @@ use crate::{ db::BuildId, web::{ MatchedRelease, MetaData, ReqVersion, error::AxumNope, escaped_uri::EscapedURI, - extractors::Path, + extractors::Path, url_decode, }, }; use anyhow::Result; @@ -15,7 +15,6 @@ use axum::{ }; use itertools::Itertools as _; use serde::Deserialize; -use std::borrow::Cow; const INDEX_HTML: &str = "index.html"; const FOLDER_AND_INDEX_HTML: &str = "/index.html"; @@ -48,7 +47,7 @@ pub(crate) struct RustdocParams { // optional behaviour marker page_kind: Option, - original_uri: Option, + original_uri: Option, name: String, req_version: ReqVersion, doc_target: Option, @@ -277,13 +276,16 @@ impl RustdocParams { }) } - pub(crate) fn original_uri(&self) -> Option<&Uri> { + pub(crate) fn original_uri(&self) -> Option<&EscapedURI> { self.original_uri.as_ref() } - pub(crate) fn with_original_uri(self, original_uri: impl Into) -> Self { + pub(crate) fn with_original_uri(self, original_uri: impl Into) -> Self { self.with_maybe_original_uri(Some(original_uri)) } - pub(crate) fn with_maybe_original_uri(self, original_uri: Option>) -> Self { + pub(crate) fn with_maybe_original_uri( + self, + original_uri: Option>, + ) -> Self { self.update(|mut params| { params.original_uri = original_uri.map(Into::into); params @@ -292,7 +294,7 @@ impl RustdocParams { #[cfg(test)] pub(crate) fn try_with_original_uri(self, original_uri: V) -> Result where - V: TryInto, + V: TryInto, V::Error: std::error::Error + Send + Sync + 'static, { use anyhow::Context as _; @@ -711,10 +713,6 @@ fn get_file_extension(path: &str) -> Option<&str> { }) } -fn url_decode<'a>(input: &'a str) -> Result> { - Ok(percent_encoding::percent_decode(input.as_bytes()).decode_utf8()?) -} - fn generate_rustdoc_url(name: &str, version: &ReqVersion, path: &str) -> EscapedURI { EscapedURI::from_path(format!("/{}/{}/{}", name, version, path)) } @@ -1148,7 +1146,7 @@ mod tests { .with_req_version(ReqVersion::Latest) .with_maybe_doc_target(target) .with_maybe_inner_path(path) - .try_with_original_uri(&dummy_path) + .try_with_original_uri(&dummy_path[..]) .unwrap() .with_default_target(DEFAULT_TARGET) .with_target_name(KRATE) diff --git a/src/web/mod.rs b/src/web/mod.rs index f4492d60b..0c0c8d13a 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -80,6 +80,10 @@ pub(crate) fn encode_url_path(path: &str) -> String { utf8_percent_encode(path, PATH).to_string() } +pub(crate) fn url_decode<'a>(input: &'a str) -> Result> { + Ok(percent_encoding::percent_decode(input.as_bytes()).decode_utf8()?) +} + const DEFAULT_BIND: SocketAddr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 3000); /// Represents a version identifier in a request in the original state. diff --git a/src/web/routes.rs b/src/web/routes.rs index a69bbb126..ba3924f83 100644 --- a/src/web/routes.rs +++ b/src/web/routes.rs @@ -193,10 +193,14 @@ pub(super) fn build_axum_routes() -> AxumRouter { "/releases/failures/{page}", get_internal(super::releases::releases_failures_by_stars_handler), ) - .route_with_tsr( + .route( "/crate/{name}", get_internal(super::crate_details::crate_details_handler), ) + .route( + "/crate/{name}/", + get_internal(super::crate_details::crate_details_handler), + ) .route_with_tsr( "/crate/{name}/{version}", get_internal(super::crate_details::crate_details_handler), diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 32d2969ab..802e845f2 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -220,7 +220,7 @@ pub(crate) async fn rustdoc_redirector_handler( let params = params.with_page_kind(PageKind::Rustdoc); fn redirect_to_doc( - original_uri: Option<&Uri>, + original_uri: Option<&EscapedURI>, url: EscapedURI, cache_policy: CachePolicy, path_in_crate: Option<&str>,