diff --git a/src/app.rs b/src/app.rs index 9a15e0f1f44..3955363c559 100644 --- a/src/app.rs +++ b/src/app.rs @@ -164,13 +164,13 @@ impl App { read_only_replica_database: replica_database, github, github_oauth, - config, version_id_cacher, downloads_counter: DownloadsCounter::new(), - emails: Emails::from_environment(), + emails: Emails::from_environment(&config), service_metrics: ServiceMetrics::new().expect("could not initialize service metrics"), instance_metrics, http_client, + config, } } diff --git a/src/config/base.rs b/src/config/base.rs index 6952787f60c..e839b627f10 100644 --- a/src/config/base.rs +++ b/src/config/base.rs @@ -12,7 +12,7 @@ use crate::{env, uploaders::Uploader, Env, Replica}; pub struct Base { - pub(super) env: Env, + pub env: Env, uploader: Uploader, } diff --git a/src/controllers.rs b/src/controllers.rs index 169ff5e0175..316f4ba19cc 100644 --- a/src/controllers.rs +++ b/src/controllers.rs @@ -8,8 +8,6 @@ mod frontend_prelude { pub use crate::util::errors::{bad_request, server_error}; } -pub(crate) use prelude::RequestUtils; - mod prelude { pub use super::helpers::ok_true; pub use diesel::prelude::*; @@ -36,8 +34,6 @@ mod prelude { fn query(&self) -> IndexMap; fn wants_json(&self) -> bool; fn query_with_params(&self, params: IndexMap) -> String; - - fn log_metadata(&mut self, key: &'static str, value: V); } impl<'a> RequestUtils for dyn RequestExt + 'a { @@ -74,10 +70,6 @@ mod prelude { .finish(); format!("?{query_string}") } - - fn log_metadata(&mut self, key: &'static str, value: V) { - crate::middleware::log_request::add_custom_metadata(self, key, value); - } } } diff --git a/src/controllers/helpers/pagination.rs b/src/controllers/helpers/pagination.rs index 573222789c4..2f1aa1dae25 100644 --- a/src/controllers/helpers/pagination.rs +++ b/src/controllers/helpers/pagination.rs @@ -92,7 +92,7 @@ impl PaginationOptionsBuilder { } if numeric_page > MAX_PAGE_BEFORE_SUSPECTED_BOT { - req.log_metadata("bot", "suspected"); + add_custom_metadata("bot", "suspected"); } // Block large offsets for known violators of the crawler policy @@ -105,7 +105,7 @@ impl PaginationOptionsBuilder { .iter() .any(|blocked| user_agent.contains(blocked)) { - add_custom_metadata(req, "cause", "large page offset"); + add_custom_metadata("cause", "large page offset"); return Err(bad_request("requested page offset is too large")); } } diff --git a/src/controllers/krate/publish.rs b/src/controllers/krate/publish.rs index 361d808bd17..70f0c3ba234 100644 --- a/src/controllers/krate/publish.rs +++ b/src/controllers/krate/publish.rs @@ -16,6 +16,7 @@ use crate::models::{ }; use crate::worker; +use crate::middleware::log_request::add_custom_metadata; use crate::schema::*; use crate::util::errors::{cargo_err, AppResult}; use crate::util::{read_fill, read_le_u32, CargoVcsInfo, LimitErrorReader, Maximums}; @@ -59,8 +60,8 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult { let new_crate = parse_new_headers(req)?; - req.log_metadata("crate_name", new_crate.name.to_string()); - req.log_metadata("crate_version", new_crate.vers.to_string()); + add_custom_metadata("crate_name", new_crate.name.to_string()); + add_custom_metadata("crate_version", new_crate.vers.to_string()); let conn = app.primary_database.get()?; let ids = req.authenticate()?; @@ -265,7 +266,7 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult { fn parse_new_headers(req: &mut dyn RequestExt) -> AppResult { // Read the json upload request let metadata_length = u64::from(read_le_u32(req.body())?); - req.log_metadata("metadata_length", metadata_length); + add_custom_metadata("metadata_length", metadata_length); let max = req.app().config.max_upload_size; if metadata_length > max { diff --git a/src/controllers/util.rs b/src/controllers/util.rs index aa824d8148f..b5588ece587 100644 --- a/src/controllers/util.rs +++ b/src/controllers/util.rs @@ -129,9 +129,9 @@ impl<'a> UserAuthenticationExt for dyn RequestExt + 'a { } } - log_request::add_custom_metadata(self, "uid", authenticated_user.user_id()); + log_request::add_custom_metadata("uid", authenticated_user.user_id()); if let Some(id) = authenticated_user.api_token_id() { - log_request::add_custom_metadata(self, "tokenid", id); + log_request::add_custom_metadata("tokenid", id); } Ok(authenticated_user) diff --git a/src/controllers/version/downloads.rs b/src/controllers/version/downloads.rs index 997baabccc1..19a60ad3bb4 100644 --- a/src/controllers/version/downloads.rs +++ b/src/controllers/version/downloads.rs @@ -5,6 +5,7 @@ use super::{extract_crate_name_and_semver, version_and_crate}; use crate::controllers::prelude::*; use crate::db::PoolError; +use crate::middleware::log_request::add_custom_metadata; use crate::models::{Crate, VersionDownload}; use crate::schema::*; use crate::views::EncodableVersionDownload; @@ -108,7 +109,7 @@ pub fn download(req: &mut dyn RequestExt) -> EndpointResult { .crate_location(&crate_name, &*version); if let Some((key, value)) = log_metadata { - req.log_metadata(key, value); + add_custom_metadata(key, value); } if req.wants_json() { diff --git a/src/email.rs b/src/email.rs index 7d9b913d669..8d995c82af7 100644 --- a/src/email.rs +++ b/src/email.rs @@ -3,10 +3,14 @@ use std::sync::Mutex; use crate::util::errors::{server_error, AppResult}; +use crate::config; +use crate::middleware::log_request::add_custom_metadata; +use crate::Env; use lettre::transport::file::FileTransport; use lettre::transport::smtp::authentication::{Credentials, Mechanism}; use lettre::transport::smtp::SmtpTransport; use lettre::{Message, Transport}; +use rand::distributions::{Alphanumeric, DistString}; #[derive(Debug)] pub struct Emails { @@ -16,7 +20,7 @@ pub struct Emails { impl Emails { /// Create a new instance detecting the backend from the environment. This will either connect /// to a SMTP server or store the emails on the local filesystem. - pub fn from_environment() -> Self { + pub fn from_environment(config: &config::Server) -> Self { let backend = match ( dotenv::var("MAILGUN_SMTP_LOGIN"), dotenv::var("MAILGUN_SMTP_PASSWORD"), @@ -32,6 +36,10 @@ impl Emails { }, }; + if config.base.env == Env::Production && !matches!(backend, EmailBackend::Smtp { .. }) { + panic!("only the smtp backend is allowed in production"); + } + Self { backend } } @@ -94,7 +102,21 @@ or go to https://{domain}/me/pending-invites to manage all of your crate ownersh } fn send(&self, recipient: &str, subject: &str, body: &str) -> AppResult<()> { + // The message ID is normally generated by the SMTP server, but if we let it generate the + // ID there will be no way for the crates.io application to know the ID of the message it + // just sent, as it's not included in the SMTP response. + // + // Our support staff needs to know the message ID to be able to find misdelivered emails. + // Because of that we're generating a random message ID, hoping the SMTP server doesn't + // replace it when it relays the message. + let message_id = format!( + "<{}@{}>", + Alphanumeric.sample_string(&mut rand::thread_rng(), 32), + crate::config::domain_name(), + ); + let email = Message::builder() + .message_id(Some(message_id.clone())) .to(recipient.parse()?) .from(self.sender_address().parse()?) .subject(subject) @@ -106,23 +128,42 @@ or go to https://{domain}/me/pending-invites to manage all of your crate ownersh login, password, } => { - SmtpTransport::relay(server)? - .credentials(Credentials::new(login.clone(), password.clone())) - .authentication(vec![Mechanism::Plain]) - .build() - .send(&email) - .map_err(|_| server_error("Error in sending email"))?; + add_custom_metadata("email_backend", "smtp"); + + SmtpTransport::relay(server) + .and_then(|transport| { + transport + .credentials(Credentials::new(login.clone(), password.clone())) + .authentication(vec![Mechanism::Plain]) + .build() + .send(&email) + }) + .map_err(|e| { + add_custom_metadata("email_error", e); + server_error("Failed to send the email") + })?; + + add_custom_metadata("email_id", message_id); } EmailBackend::FileSystem { path } => { - FileTransport::new(&path) - .send(&email) - .map_err(|_| server_error("Email file could not be generated"))?; + add_custom_metadata("email_backend", "fs"); + + let id = FileTransport::new(&path).send(&email).map_err(|err| { + add_custom_metadata("email_error", err); + server_error("Email file could not be generated") + })?; + + add_custom_metadata("email_path", path.join(format!("{id}.eml")).display()); + } + EmailBackend::Memory { mails } => { + add_custom_metadata("email_backend", "memory"); + + mails.lock().unwrap().push(StoredEmail { + to: recipient.into(), + subject: subject.into(), + body: body.into(), + }); } - EmailBackend::Memory { mails } => mails.lock().unwrap().push(StoredEmail { - to: recipient.into(), - subject: subject.into(), - body: body.into(), - }), } Ok(()) diff --git a/src/middleware/balance_capacity.rs b/src/middleware/balance_capacity.rs index 1b66aec8567..cf053ff2094 100644 --- a/src/middleware/balance_capacity.rs +++ b/src/middleware/balance_capacity.rs @@ -56,11 +56,11 @@ impl BalanceCapacity { fn handle_high_load(&self, request: &mut dyn RequestExt, note: &str) -> AfterResult { if self.report_only { // In report-only mode we serve all requests but add log metadata - add_custom_metadata(request, "would_reject", note); + add_custom_metadata("would_reject", note); self.handle(request) } else { // Reject the request - add_custom_metadata(request, "cause", note); + add_custom_metadata("cause", note); let body = "Service temporarily unavailable"; Response::builder() .status(StatusCode::SERVICE_UNAVAILABLE) @@ -84,7 +84,7 @@ impl Handler for BalanceCapacity { // Begin logging total request count so early stages of load increase can be located if in_flight_total >= self.log_total_at_count { - add_custom_metadata(request, "in_flight_total", in_flight_total); + add_custom_metadata("in_flight_total", in_flight_total); } // Download requests are always accepted and do not affect the capacity tracking @@ -98,7 +98,7 @@ impl Handler for BalanceCapacity { // Begin logging non-download request count so early stages of non-download load increase can be located if load >= self.log_at_percentage { - add_custom_metadata(request, "in_flight_non_dl_requests", count); + add_custom_metadata("in_flight_non_dl_requests", count); } // Reject read-only requests as load nears capacity. Bots are likely to send only safe diff --git a/src/middleware/block_traffic.rs b/src/middleware/block_traffic.rs index f145d3a422b..8ed570e5165 100644 --- a/src/middleware/block_traffic.rs +++ b/src/middleware/block_traffic.rs @@ -48,7 +48,7 @@ impl Handler for BlockTraffic { .any(|value| self.blocked_values.iter().any(|v| v == value)); if has_blocked_value { let cause = format!("blocked due to contents of header {}", self.header_name); - add_custom_metadata(req, "cause", cause); + add_custom_metadata("cause", cause); let body = format!( "We are unable to process your request at this time. \ This usually means that you are in violation of our crawler \ diff --git a/src/middleware/log_request.rs b/src/middleware/log_request.rs index 3c25c95e011..2e710dc4eae 100644 --- a/src/middleware/log_request.rs +++ b/src/middleware/log_request.rs @@ -8,14 +8,29 @@ use conduit::{header, RequestExt, StatusCode}; use crate::middleware::normalize_path::OriginalPath; use crate::middleware::response_timing::ResponseTime; +use std::cell::RefCell; use std::fmt::{self, Display, Formatter}; const SLOW_REQUEST_THRESHOLD_MS: u64 = 1000; +// A thread local is used instead of a request extension to avoid the need to pass the request +// object everywhere in the codebase. When migrating to async this will need to be moved to an +// async-equivalent, as thread locals misbehave in async contexes. +thread_local! { + static CUSTOM_METADATA: RefCell> = RefCell::new(Vec::new()); +} + #[derive(Default)] pub(super) struct LogRequests(); impl Middleware for LogRequests { + fn before(&self, _: &mut dyn RequestExt) -> BeforeResult { + // Remove any metadata set by the previous task before processing any new request. + CUSTOM_METADATA.with(|metadata| metadata.borrow_mut().clear()); + + Ok(()) + } + fn after(&self, req: &mut dyn RequestExt, res: AfterResult) -> AfterResult { println!("{}", RequestLine { req, res: &res }); @@ -23,33 +38,21 @@ impl Middleware for LogRequests { } } -struct CustomMetadata { - entries: Vec<(&'static str, String)>, -} - -pub fn add_custom_metadata(req: &mut dyn RequestExt, key: &'static str, value: V) { - if let Some(metadata) = req.mut_extensions().get_mut::() { - metadata.entries.push((key, value.to_string())); - } else { - let mut metadata = CustomMetadata { - entries: Vec::new(), - }; - metadata.entries.push((key, value.to_string())); - req.mut_extensions().insert(metadata); - } - +pub fn add_custom_metadata(key: &'static str, value: V) { + CUSTOM_METADATA.with(|metadata| metadata.borrow_mut().push((key, value.to_string()))); sentry::configure_scope(|scope| scope.set_extra(key, value.to_string().into())); } #[cfg(test)] -pub(crate) fn get_log_message(req: &dyn RequestExt, key: &'static str) -> String { - // Unwrap shouldn't panic as no other code has access to the private struct to remove it - for (k, v) in &req.extensions().get::().unwrap().entries { - if key == *k { - return v.clone(); +pub(crate) fn get_log_message(key: &'static str) -> String { + CUSTOM_METADATA.with(|metadata| { + for (k, v) in &*metadata.borrow() { + if key == *k { + return v.clone(); + } } - } - panic!("expected log message for {} not found", key); + panic!("expected log message for {} not found", key); + }) } struct RequestLine<'r> { @@ -95,11 +98,12 @@ impl Display for RequestLine<'_> { line.add_field("status", status.as_str())?; line.add_quoted_field("user_agent", request_header(self.req, header::USER_AGENT))?; - if let Some(metadata) = self.req.extensions().get::() { - for (key, value) in &metadata.entries { + CUSTOM_METADATA.with(|metadata| { + for (key, value) in &*metadata.borrow() { line.add_quoted_field(key, value)?; } - } + fmt::Result::Ok(()) + })?; if let Err(err) = self.res { line.add_quoted_field("error", err)?; diff --git a/src/middleware/normalize_path.rs b/src/middleware/normalize_path.rs index 2ac260d1a56..e27b4851772 100644 --- a/src/middleware/normalize_path.rs +++ b/src/middleware/normalize_path.rs @@ -43,7 +43,7 @@ impl Middleware for NormalizePath { .to_string_lossy() .to_string(); // non-Unicode is replaced with U+FFFD REPLACEMENT CHARACTER - add_custom_metadata(req, "normalized_path", path.clone()); + add_custom_metadata("normalized_path", path.clone()); *req.path_mut() = path; req.mut_extensions().insert(original_path); diff --git a/src/middleware/require_user_agent.rs b/src/middleware/require_user_agent.rs index ebfc7a749a3..05dfb4bbd5f 100644 --- a/src/middleware/require_user_agent.rs +++ b/src/middleware/require_user_agent.rs @@ -32,7 +32,7 @@ impl Handler for RequireUserAgent { let has_user_agent = !agent.is_empty() && agent != self.cdn_user_agent; let is_download = req.path().ends_with("download"); if !has_user_agent && !is_download { - add_custom_metadata(req, "cause", "no user agent"); + add_custom_metadata("cause", "no user agent"); let body = format!( include_str!("no_user_agent_message.txt"), request_header(req, "x-request-id"), diff --git a/src/router.rs b/src/router.rs index 737d6cb9320..68f44e05892 100644 --- a/src/router.rs +++ b/src/router.rs @@ -5,6 +5,7 @@ use conduit_router::{RequestParams, RouteBuilder, RoutePattern}; use crate::controllers::*; use crate::middleware::app::RequestApp; +use crate::middleware::log_request::add_custom_metadata; use crate::util::errors::{std_error, AppError, RouteBlocked}; use crate::util::EndpointResult; use crate::{App, Env}; @@ -171,7 +172,7 @@ impl Handler for C { Ok(resp) => Ok(resp), Err(e) => { if let Some(cause) = e.cause() { - req.log_metadata("cause", cause.to_string()) + add_custom_metadata("cause", cause.to_string()) }; match e.response() { Some(response) => Ok(response), @@ -249,7 +250,7 @@ mod tests { .unwrap(); assert_eq!(response.status(), StatusCode::BAD_REQUEST); assert_eq!( - crate::middleware::log_request::get_log_message(&req, "cause"), + crate::middleware::log_request::get_log_message("cause"), "middle error caused by invalid digit found in string" );