Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 1 addition & 84 deletions src/config/server.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use anyhow::{Context, anyhow};
use ipnetwork::IpNetwork;
use oauth2::{ClientId, ClientSecret};
use url::Url;

Expand Down Expand Up @@ -53,8 +51,6 @@ pub struct Server {
pub blocked_traffic: Vec<(String, Vec<String>)>,
pub blocked_ips: HashSet<IpAddr>,
pub max_allowed_page_offset: u32,
pub page_offset_ua_blocklist: Vec<String>,
pub page_offset_cidr_blocklist: Vec<IpNetwork>,
pub excluded_crate_names: Vec<String>,
pub domain_name: String,
pub allowed_origins: AllowedOrigins,
Expand Down Expand Up @@ -123,12 +119,6 @@ impl Server {
/// querying metrics will be completely disabled.
/// - `WEB_MAX_ALLOWED_PAGE_OFFSET`: Page offsets larger than this value are rejected. Defaults
/// to 200.
/// - `WEB_PAGE_OFFSET_UA_BLOCKLIST`: A comma separated list of user-agent substrings that will
/// be blocked if `WEB_MAX_ALLOWED_PAGE_OFFSET` is exceeded. Including an empty string in the
/// list will block *all* user-agents exceeding the offset. If not set or empty, no blocking
/// will occur.
/// - `WEB_PAGE_OFFSET_CIDR_BLOCKLIST`: A comma separated list of CIDR blocks that will be used
/// to block IP addresses, e.g. `192.168.1.0/24`. If not set or empty, no blocking will occur.
/// - `INSTANCE_METRICS_LOG_EVERY_SECONDS`: How frequently should instance metrics be logged.
/// If the environment variable is not present instance metrics are not logged.
/// - `FORCE_UNCONDITIONAL_REDIRECTS`: Whether to force unconditional redirects in the download
Expand Down Expand Up @@ -156,9 +146,6 @@ impl Server {
let blocked_ips = HashSet::from_iter(list_parsed("BLOCKED_IPS", IpAddr::from_str)?);

let allowed_origins = AllowedOrigins::from_default_env()?;
let page_offset_ua_blocklist = list("WEB_PAGE_OFFSET_UA_BLOCKLIST")?;
let page_offset_cidr_blocklist =
list_parsed("WEB_PAGE_OFFSET_CIDR_BLOCKLIST", parse_cidr_block)?;

let base = Base::from_environment()?;
let excluded_crate_names = list("EXCLUDED_CRATE_NAMES")?;
Expand Down Expand Up @@ -229,8 +216,6 @@ impl Server {
blocked_traffic: blocked_traffic(),
blocked_ips,
max_allowed_page_offset: var_parsed("WEB_MAX_ALLOWED_PAGE_OFFSET")?.unwrap_or(200),
page_offset_ua_blocklist,
page_offset_cidr_blocklist,
excluded_crate_names,
domain_name,
allowed_origins,
Expand Down Expand Up @@ -268,34 +253,6 @@ impl Server {
}
}

/// Parses a CIDR block string to a valid `IpNetwork` struct.
///
/// The purpose is to be able to block IP ranges that overload the API that uses pagination.
///
/// The minimum number of bits for a host prefix must be
///
/// * at least 16 for IPv4 based CIDRs.
/// * at least 64 for IPv6 based CIDRs
///
fn parse_cidr_block(block: &str) -> anyhow::Result<IpNetwork> {
let cidr = block
.parse()
.context("WEB_PAGE_OFFSET_CIDR_BLOCKLIST must contain IPv4 or IPv6 CIDR blocks.")?;

let host_prefix = match cidr {
IpNetwork::V4(_) => 16,
IpNetwork::V6(_) => 64,
};

if cidr.prefix() < host_prefix {
return Err(anyhow!(
"WEB_PAGE_OFFSET_CIDR_BLOCKLIST only allows CIDR blocks with a host prefix of at least 16 bits (IPv4) or 64 bits (IPv6)."
));
}

Ok(cidr)
}

fn blocked_traffic() -> Vec<(String, Vec<String>)> {
let pattern_list = dotenvy::var("BLOCKED_TRAFFIC").unwrap_or_default();
parse_traffic_patterns(&pattern_list)
Expand Down Expand Up @@ -346,7 +303,7 @@ impl FromStr for AllowedOrigins {
#[cfg(test)]
mod tests {
use super::*;
use claims::{assert_err, assert_none, assert_ok_eq};
use claims::assert_none;

#[test]
fn parse_traffic_patterns_splits_on_comma_and_looks_for_equal_sign() {
Expand All @@ -362,44 +319,4 @@ mod tests {

assert_none!(parse_traffic_patterns(pattern_string_3).next());
}

#[test]
fn parse_cidr_block_list_successfully() {
assert_ok_eq!(
parse_cidr_block("127.0.0.1/24"),
"127.0.0.1/24".parse::<IpNetwork>().unwrap()
);
assert_ok_eq!(
parse_cidr_block("192.168.0.1/31"),
"192.168.0.1/31".parse::<IpNetwork>().unwrap()
);
}

#[test]
fn parse_cidr_blocks_panics_when_host_ipv4_prefix_is_too_low() {
assert_err!(parse_cidr_block("127.0.0.1/8"));
}

#[test]
fn parse_cidr_blocks_panics_when_host_ipv6_prefix_is_too_low() {
assert_err!(parse_cidr_block(
"2001:0db8:0123:4567:89ab:cdef:1234:5678/56"
));
}

#[test]
fn parse_ipv6_based_cidr_blocks() {
assert_ok_eq!(
parse_cidr_block("2002::1234:abcd:ffff:c0a8:101/64"),
"2002::1234:abcd:ffff:c0a8:101/64"
.parse::<IpNetwork>()
.unwrap()
);
assert_ok_eq!(
parse_cidr_block("2001:0db8:0123:4567:89ab:cdef:1234:5678/92"),
"2001:0db8:0123:4567:89ab:cdef:1234:5678/92"
.parse::<IpNetwork>()
.unwrap()
);
}
}
40 changes: 2 additions & 38 deletions src/controllers/helpers/pagination.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
use crate::config::Server;
use crate::middleware::app::RequestApp;
use crate::middleware::log_request::RequestLogExt;
use crate::middleware::real_ip::RealIp;
use crate::models::helpers::with_count::*;
use crate::util::HeaderMapExt;
use crate::util::errors::{AppResult, bad_request};
use std::num::NonZeroU32;

Expand All @@ -16,7 +13,6 @@ use diesel::sql_types::BigInt;
use diesel_async::AsyncPgConnection;
use futures_util::future::BoxFuture;
use futures_util::{FutureExt, TryStreamExt};
use http::header;
use http::request::Parts;
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -125,12 +121,10 @@ impl PaginationOptionsBuilder {
parts.request_log().add("bot", "suspected");
}

// Block large offsets for known violators of the crawler policy
// Block large offsets for performance reasons
if self.limit_page_numbers {
let config = &parts.app().config;
if numeric_page > config.max_allowed_page_offset
&& is_useragent_or_ip_blocked(config, parts)
{
if numeric_page > config.max_allowed_page_offset {
parts.request_log().add("cause", "large page offset");

let error = format!(
Expand Down Expand Up @@ -329,36 +323,6 @@ impl RawSeekPayload {
}
}

/// Function to check if the request is blocked.
///
/// A request can be blocked if either the User Agent is on the User Agent block list or if the client
/// IP is on the CIDR block list.
fn is_useragent_or_ip_blocked(config: &Server, req: &Parts) -> bool {
let user_agent = req.headers.get_str_or_default(header::USER_AGENT);
let client_ip = req.extensions.get::<RealIp>();

// check if user agent is blocked
if config
.page_offset_ua_blocklist
.iter()
.any(|blocked| user_agent.contains(blocked))
{
return true;
}

// check if client ip is blocked, needs to be an IPv4 address
if let Some(client_ip) = client_ip
&& config
.page_offset_cidr_blocklist
.iter()
.any(|blocked| blocked.contains(**client_ip))
{
return true;
}

false
}

/// Encode a payload to be used as a seek key.
///
/// The payload is base64-encoded to hint that it shouldn't be manually constructed. There is no
Expand Down
4 changes: 1 addition & 3 deletions src/tests/pagination.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
use crate::builders::CrateBuilder;
use crate::util::{RequestHelper, TestApp};
use insta::assert_snapshot;
use ipnetwork::IpNetwork;

#[tokio::test(flavor = "multi_thread")]
async fn pagination_blocks_ip_from_cidr_block_list() {
async fn pagination_blocks_high_page_numbers() {
let (app, anon, user) = TestApp::init()
.with_config(|config| {
config.max_allowed_page_offset = 1;
config.page_offset_cidr_blocklist = vec!["127.0.0.1/24".parse::<IpNetwork>().unwrap()];
})
.with_user()
.await;
Expand Down
2 changes: 0 additions & 2 deletions src/tests/util/test_app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -497,8 +497,6 @@ fn simple_config() -> config::Server {
blocked_traffic: Default::default(),
blocked_ips: Default::default(),
max_allowed_page_offset: 200,
page_offset_ua_blocklist: vec![],
page_offset_cidr_blocklist: vec![],
excluded_crate_names: vec![],
domain_name: "crates.io".into(),
allowed_origins: Default::default(),
Expand Down