From 89d5cab5039bfdd9e922c98c57bb672eff7188ce Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Thu, 23 Oct 2025 17:39:43 +0100 Subject: [PATCH] Block high page numbers unconditionally Remove conditional blocking based on user-agent and IP address blocklists. Now all requests exceeding `max_allowed_page_offset` are blocked when the endpoint uses `.limit_page_numbers()`, regardless of the requesting client's identity. This simplifies the pagination logic and removes the need for maintaining blocklists while still protecting against performance issues from deep pagination. --- src/config/server.rs | 85 +-------------------------- src/controllers/helpers/pagination.rs | 40 +------------ src/tests/pagination.rs | 4 +- src/tests/util/test_app.rs | 2 - 4 files changed, 4 insertions(+), 127 deletions(-) diff --git a/src/config/server.rs b/src/config/server.rs index 010dca68040..f91d48345a9 100644 --- a/src/config/server.rs +++ b/src/config/server.rs @@ -1,5 +1,3 @@ -use anyhow::{Context, anyhow}; -use ipnetwork::IpNetwork; use oauth2::{ClientId, ClientSecret}; use url::Url; @@ -53,8 +51,6 @@ pub struct Server { pub blocked_traffic: Vec<(String, Vec)>, pub blocked_ips: HashSet, pub max_allowed_page_offset: u32, - pub page_offset_ua_blocklist: Vec, - pub page_offset_cidr_blocklist: Vec, pub excluded_crate_names: Vec, pub domain_name: String, pub allowed_origins: AllowedOrigins, @@ -123,12 +119,6 @@ impl Server { /// querying metrics will be completely disabled. /// - `WEB_MAX_ALLOWED_PAGE_OFFSET`: Page offsets larger than this value are rejected. Defaults /// to 200. - /// - `WEB_PAGE_OFFSET_UA_BLOCKLIST`: A comma separated list of user-agent substrings that will - /// be blocked if `WEB_MAX_ALLOWED_PAGE_OFFSET` is exceeded. Including an empty string in the - /// list will block *all* user-agents exceeding the offset. If not set or empty, no blocking - /// will occur. - /// - `WEB_PAGE_OFFSET_CIDR_BLOCKLIST`: A comma separated list of CIDR blocks that will be used - /// to block IP addresses, e.g. `192.168.1.0/24`. If not set or empty, no blocking will occur. /// - `INSTANCE_METRICS_LOG_EVERY_SECONDS`: How frequently should instance metrics be logged. /// If the environment variable is not present instance metrics are not logged. /// - `FORCE_UNCONDITIONAL_REDIRECTS`: Whether to force unconditional redirects in the download @@ -156,9 +146,6 @@ impl Server { let blocked_ips = HashSet::from_iter(list_parsed("BLOCKED_IPS", IpAddr::from_str)?); let allowed_origins = AllowedOrigins::from_default_env()?; - let page_offset_ua_blocklist = list("WEB_PAGE_OFFSET_UA_BLOCKLIST")?; - let page_offset_cidr_blocklist = - list_parsed("WEB_PAGE_OFFSET_CIDR_BLOCKLIST", parse_cidr_block)?; let base = Base::from_environment()?; let excluded_crate_names = list("EXCLUDED_CRATE_NAMES")?; @@ -229,8 +216,6 @@ impl Server { blocked_traffic: blocked_traffic(), blocked_ips, max_allowed_page_offset: var_parsed("WEB_MAX_ALLOWED_PAGE_OFFSET")?.unwrap_or(200), - page_offset_ua_blocklist, - page_offset_cidr_blocklist, excluded_crate_names, domain_name, allowed_origins, @@ -268,34 +253,6 @@ impl Server { } } -/// Parses a CIDR block string to a valid `IpNetwork` struct. -/// -/// The purpose is to be able to block IP ranges that overload the API that uses pagination. -/// -/// The minimum number of bits for a host prefix must be -/// -/// * at least 16 for IPv4 based CIDRs. -/// * at least 64 for IPv6 based CIDRs -/// -fn parse_cidr_block(block: &str) -> anyhow::Result { - let cidr = block - .parse() - .context("WEB_PAGE_OFFSET_CIDR_BLOCKLIST must contain IPv4 or IPv6 CIDR blocks.")?; - - let host_prefix = match cidr { - IpNetwork::V4(_) => 16, - IpNetwork::V6(_) => 64, - }; - - if cidr.prefix() < host_prefix { - return Err(anyhow!( - "WEB_PAGE_OFFSET_CIDR_BLOCKLIST only allows CIDR blocks with a host prefix of at least 16 bits (IPv4) or 64 bits (IPv6)." - )); - } - - Ok(cidr) -} - fn blocked_traffic() -> Vec<(String, Vec)> { let pattern_list = dotenvy::var("BLOCKED_TRAFFIC").unwrap_or_default(); parse_traffic_patterns(&pattern_list) @@ -346,7 +303,7 @@ impl FromStr for AllowedOrigins { #[cfg(test)] mod tests { use super::*; - use claims::{assert_err, assert_none, assert_ok_eq}; + use claims::assert_none; #[test] fn parse_traffic_patterns_splits_on_comma_and_looks_for_equal_sign() { @@ -362,44 +319,4 @@ mod tests { assert_none!(parse_traffic_patterns(pattern_string_3).next()); } - - #[test] - fn parse_cidr_block_list_successfully() { - assert_ok_eq!( - parse_cidr_block("127.0.0.1/24"), - "127.0.0.1/24".parse::().unwrap() - ); - assert_ok_eq!( - parse_cidr_block("192.168.0.1/31"), - "192.168.0.1/31".parse::().unwrap() - ); - } - - #[test] - fn parse_cidr_blocks_panics_when_host_ipv4_prefix_is_too_low() { - assert_err!(parse_cidr_block("127.0.0.1/8")); - } - - #[test] - fn parse_cidr_blocks_panics_when_host_ipv6_prefix_is_too_low() { - assert_err!(parse_cidr_block( - "2001:0db8:0123:4567:89ab:cdef:1234:5678/56" - )); - } - - #[test] - fn parse_ipv6_based_cidr_blocks() { - assert_ok_eq!( - parse_cidr_block("2002::1234:abcd:ffff:c0a8:101/64"), - "2002::1234:abcd:ffff:c0a8:101/64" - .parse::() - .unwrap() - ); - assert_ok_eq!( - parse_cidr_block("2001:0db8:0123:4567:89ab:cdef:1234:5678/92"), - "2001:0db8:0123:4567:89ab:cdef:1234:5678/92" - .parse::() - .unwrap() - ); - } } diff --git a/src/controllers/helpers/pagination.rs b/src/controllers/helpers/pagination.rs index 1ace002b2b2..8ed21a4eec6 100644 --- a/src/controllers/helpers/pagination.rs +++ b/src/controllers/helpers/pagination.rs @@ -1,9 +1,6 @@ -use crate::config::Server; use crate::middleware::app::RequestApp; use crate::middleware::log_request::RequestLogExt; -use crate::middleware::real_ip::RealIp; use crate::models::helpers::with_count::*; -use crate::util::HeaderMapExt; use crate::util::errors::{AppResult, bad_request}; use std::num::NonZeroU32; @@ -16,7 +13,6 @@ use diesel::sql_types::BigInt; use diesel_async::AsyncPgConnection; use futures_util::future::BoxFuture; use futures_util::{FutureExt, TryStreamExt}; -use http::header; use http::request::Parts; use indexmap::IndexMap; use serde::{Deserialize, Serialize}; @@ -125,12 +121,10 @@ impl PaginationOptionsBuilder { parts.request_log().add("bot", "suspected"); } - // Block large offsets for known violators of the crawler policy + // Block large offsets for performance reasons if self.limit_page_numbers { let config = &parts.app().config; - if numeric_page > config.max_allowed_page_offset - && is_useragent_or_ip_blocked(config, parts) - { + if numeric_page > config.max_allowed_page_offset { parts.request_log().add("cause", "large page offset"); let error = format!( @@ -329,36 +323,6 @@ impl RawSeekPayload { } } -/// Function to check if the request is blocked. -/// -/// A request can be blocked if either the User Agent is on the User Agent block list or if the client -/// IP is on the CIDR block list. -fn is_useragent_or_ip_blocked(config: &Server, req: &Parts) -> bool { - let user_agent = req.headers.get_str_or_default(header::USER_AGENT); - let client_ip = req.extensions.get::(); - - // check if user agent is blocked - if config - .page_offset_ua_blocklist - .iter() - .any(|blocked| user_agent.contains(blocked)) - { - return true; - } - - // check if client ip is blocked, needs to be an IPv4 address - if let Some(client_ip) = client_ip - && config - .page_offset_cidr_blocklist - .iter() - .any(|blocked| blocked.contains(**client_ip)) - { - return true; - } - - false -} - /// Encode a payload to be used as a seek key. /// /// The payload is base64-encoded to hint that it shouldn't be manually constructed. There is no diff --git a/src/tests/pagination.rs b/src/tests/pagination.rs index bb09b6757a1..4a65ae31a7b 100644 --- a/src/tests/pagination.rs +++ b/src/tests/pagination.rs @@ -1,14 +1,12 @@ use crate::builders::CrateBuilder; use crate::util::{RequestHelper, TestApp}; use insta::assert_snapshot; -use ipnetwork::IpNetwork; #[tokio::test(flavor = "multi_thread")] -async fn pagination_blocks_ip_from_cidr_block_list() { +async fn pagination_blocks_high_page_numbers() { let (app, anon, user) = TestApp::init() .with_config(|config| { config.max_allowed_page_offset = 1; - config.page_offset_cidr_blocklist = vec!["127.0.0.1/24".parse::().unwrap()]; }) .with_user() .await; diff --git a/src/tests/util/test_app.rs b/src/tests/util/test_app.rs index 5958661bfec..5b6fd666ae3 100644 --- a/src/tests/util/test_app.rs +++ b/src/tests/util/test_app.rs @@ -497,8 +497,6 @@ fn simple_config() -> config::Server { blocked_traffic: Default::default(), blocked_ips: Default::default(), max_allowed_page_offset: 200, - page_offset_ua_blocklist: vec![], - page_offset_cidr_blocklist: vec![], excluded_crate_names: vec![], domain_name: "crates.io".into(), allowed_origins: Default::default(),