From d1ecef40b86415ce544917d984ba540d69521cf4 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Tue, 8 Mar 2022 11:08:26 +0100 Subject: [PATCH 1/3] use a thread local to store custom metadata Before this commit, custom metadata was stored as part of a request extension, which required the mutable request object to be passed to every place which needed to log something. This commit changes the implementation to use a thread local that is cleared before a request is processed. This removes the dependency on the mutable request object. Note that when migrating the crates.io codebase to async the thread local will need to be replaced with an async equivalent. --- src/controllers.rs | 8 ---- src/controllers/helpers/pagination.rs | 4 +- src/controllers/krate/publish.rs | 7 ++-- src/controllers/util.rs | 4 +- src/controllers/version/downloads.rs | 3 +- src/middleware/balance_capacity.rs | 8 ++-- src/middleware/block_traffic.rs | 2 +- src/middleware/log_request.rs | 54 ++++++++++++++------------- src/middleware/normalize_path.rs | 2 +- src/middleware/require_user_agent.rs | 2 +- src/router.rs | 5 ++- 11 files changed, 49 insertions(+), 50 deletions(-) diff --git a/src/controllers.rs b/src/controllers.rs index 169ff5e0175..316f4ba19cc 100644 --- a/src/controllers.rs +++ b/src/controllers.rs @@ -8,8 +8,6 @@ mod frontend_prelude { pub use crate::util::errors::{bad_request, server_error}; } -pub(crate) use prelude::RequestUtils; - mod prelude { pub use super::helpers::ok_true; pub use diesel::prelude::*; @@ -36,8 +34,6 @@ mod prelude { fn query(&self) -> IndexMap; fn wants_json(&self) -> bool; fn query_with_params(&self, params: IndexMap) -> String; - - fn log_metadata(&mut self, key: &'static str, value: V); } impl<'a> RequestUtils for dyn RequestExt + 'a { @@ -74,10 +70,6 @@ mod prelude { .finish(); format!("?{query_string}") } - - fn log_metadata(&mut self, key: &'static str, value: V) { - crate::middleware::log_request::add_custom_metadata(self, key, value); - } } } diff --git a/src/controllers/helpers/pagination.rs b/src/controllers/helpers/pagination.rs index 573222789c4..2f1aa1dae25 100644 --- a/src/controllers/helpers/pagination.rs +++ b/src/controllers/helpers/pagination.rs @@ -92,7 +92,7 @@ impl PaginationOptionsBuilder { } if numeric_page > MAX_PAGE_BEFORE_SUSPECTED_BOT { - req.log_metadata("bot", "suspected"); + add_custom_metadata("bot", "suspected"); } // Block large offsets for known violators of the crawler policy @@ -105,7 +105,7 @@ impl PaginationOptionsBuilder { .iter() .any(|blocked| user_agent.contains(blocked)) { - add_custom_metadata(req, "cause", "large page offset"); + add_custom_metadata("cause", "large page offset"); return Err(bad_request("requested page offset is too large")); } } diff --git a/src/controllers/krate/publish.rs b/src/controllers/krate/publish.rs index 361d808bd17..70f0c3ba234 100644 --- a/src/controllers/krate/publish.rs +++ b/src/controllers/krate/publish.rs @@ -16,6 +16,7 @@ use crate::models::{ }; use crate::worker; +use crate::middleware::log_request::add_custom_metadata; use crate::schema::*; use crate::util::errors::{cargo_err, AppResult}; use crate::util::{read_fill, read_le_u32, CargoVcsInfo, LimitErrorReader, Maximums}; @@ -59,8 +60,8 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult { let new_crate = parse_new_headers(req)?; - req.log_metadata("crate_name", new_crate.name.to_string()); - req.log_metadata("crate_version", new_crate.vers.to_string()); + add_custom_metadata("crate_name", new_crate.name.to_string()); + add_custom_metadata("crate_version", new_crate.vers.to_string()); let conn = app.primary_database.get()?; let ids = req.authenticate()?; @@ -265,7 +266,7 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult { fn parse_new_headers(req: &mut dyn RequestExt) -> AppResult { // Read the json upload request let metadata_length = u64::from(read_le_u32(req.body())?); - req.log_metadata("metadata_length", metadata_length); + add_custom_metadata("metadata_length", metadata_length); let max = req.app().config.max_upload_size; if metadata_length > max { diff --git a/src/controllers/util.rs b/src/controllers/util.rs index aa824d8148f..b5588ece587 100644 --- a/src/controllers/util.rs +++ b/src/controllers/util.rs @@ -129,9 +129,9 @@ impl<'a> UserAuthenticationExt for dyn RequestExt + 'a { } } - log_request::add_custom_metadata(self, "uid", authenticated_user.user_id()); + log_request::add_custom_metadata("uid", authenticated_user.user_id()); if let Some(id) = authenticated_user.api_token_id() { - log_request::add_custom_metadata(self, "tokenid", id); + log_request::add_custom_metadata("tokenid", id); } Ok(authenticated_user) diff --git a/src/controllers/version/downloads.rs b/src/controllers/version/downloads.rs index 997baabccc1..19a60ad3bb4 100644 --- a/src/controllers/version/downloads.rs +++ b/src/controllers/version/downloads.rs @@ -5,6 +5,7 @@ use super::{extract_crate_name_and_semver, version_and_crate}; use crate::controllers::prelude::*; use crate::db::PoolError; +use crate::middleware::log_request::add_custom_metadata; use crate::models::{Crate, VersionDownload}; use crate::schema::*; use crate::views::EncodableVersionDownload; @@ -108,7 +109,7 @@ pub fn download(req: &mut dyn RequestExt) -> EndpointResult { .crate_location(&crate_name, &*version); if let Some((key, value)) = log_metadata { - req.log_metadata(key, value); + add_custom_metadata(key, value); } if req.wants_json() { diff --git a/src/middleware/balance_capacity.rs b/src/middleware/balance_capacity.rs index 1b66aec8567..cf053ff2094 100644 --- a/src/middleware/balance_capacity.rs +++ b/src/middleware/balance_capacity.rs @@ -56,11 +56,11 @@ impl BalanceCapacity { fn handle_high_load(&self, request: &mut dyn RequestExt, note: &str) -> AfterResult { if self.report_only { // In report-only mode we serve all requests but add log metadata - add_custom_metadata(request, "would_reject", note); + add_custom_metadata("would_reject", note); self.handle(request) } else { // Reject the request - add_custom_metadata(request, "cause", note); + add_custom_metadata("cause", note); let body = "Service temporarily unavailable"; Response::builder() .status(StatusCode::SERVICE_UNAVAILABLE) @@ -84,7 +84,7 @@ impl Handler for BalanceCapacity { // Begin logging total request count so early stages of load increase can be located if in_flight_total >= self.log_total_at_count { - add_custom_metadata(request, "in_flight_total", in_flight_total); + add_custom_metadata("in_flight_total", in_flight_total); } // Download requests are always accepted and do not affect the capacity tracking @@ -98,7 +98,7 @@ impl Handler for BalanceCapacity { // Begin logging non-download request count so early stages of non-download load increase can be located if load >= self.log_at_percentage { - add_custom_metadata(request, "in_flight_non_dl_requests", count); + add_custom_metadata("in_flight_non_dl_requests", count); } // Reject read-only requests as load nears capacity. Bots are likely to send only safe diff --git a/src/middleware/block_traffic.rs b/src/middleware/block_traffic.rs index f145d3a422b..8ed570e5165 100644 --- a/src/middleware/block_traffic.rs +++ b/src/middleware/block_traffic.rs @@ -48,7 +48,7 @@ impl Handler for BlockTraffic { .any(|value| self.blocked_values.iter().any(|v| v == value)); if has_blocked_value { let cause = format!("blocked due to contents of header {}", self.header_name); - add_custom_metadata(req, "cause", cause); + add_custom_metadata("cause", cause); let body = format!( "We are unable to process your request at this time. \ This usually means that you are in violation of our crawler \ diff --git a/src/middleware/log_request.rs b/src/middleware/log_request.rs index 3c25c95e011..2e710dc4eae 100644 --- a/src/middleware/log_request.rs +++ b/src/middleware/log_request.rs @@ -8,14 +8,29 @@ use conduit::{header, RequestExt, StatusCode}; use crate::middleware::normalize_path::OriginalPath; use crate::middleware::response_timing::ResponseTime; +use std::cell::RefCell; use std::fmt::{self, Display, Formatter}; const SLOW_REQUEST_THRESHOLD_MS: u64 = 1000; +// A thread local is used instead of a request extension to avoid the need to pass the request +// object everywhere in the codebase. When migrating to async this will need to be moved to an +// async-equivalent, as thread locals misbehave in async contexes. +thread_local! { + static CUSTOM_METADATA: RefCell> = RefCell::new(Vec::new()); +} + #[derive(Default)] pub(super) struct LogRequests(); impl Middleware for LogRequests { + fn before(&self, _: &mut dyn RequestExt) -> BeforeResult { + // Remove any metadata set by the previous task before processing any new request. + CUSTOM_METADATA.with(|metadata| metadata.borrow_mut().clear()); + + Ok(()) + } + fn after(&self, req: &mut dyn RequestExt, res: AfterResult) -> AfterResult { println!("{}", RequestLine { req, res: &res }); @@ -23,33 +38,21 @@ impl Middleware for LogRequests { } } -struct CustomMetadata { - entries: Vec<(&'static str, String)>, -} - -pub fn add_custom_metadata(req: &mut dyn RequestExt, key: &'static str, value: V) { - if let Some(metadata) = req.mut_extensions().get_mut::() { - metadata.entries.push((key, value.to_string())); - } else { - let mut metadata = CustomMetadata { - entries: Vec::new(), - }; - metadata.entries.push((key, value.to_string())); - req.mut_extensions().insert(metadata); - } - +pub fn add_custom_metadata(key: &'static str, value: V) { + CUSTOM_METADATA.with(|metadata| metadata.borrow_mut().push((key, value.to_string()))); sentry::configure_scope(|scope| scope.set_extra(key, value.to_string().into())); } #[cfg(test)] -pub(crate) fn get_log_message(req: &dyn RequestExt, key: &'static str) -> String { - // Unwrap shouldn't panic as no other code has access to the private struct to remove it - for (k, v) in &req.extensions().get::().unwrap().entries { - if key == *k { - return v.clone(); +pub(crate) fn get_log_message(key: &'static str) -> String { + CUSTOM_METADATA.with(|metadata| { + for (k, v) in &*metadata.borrow() { + if key == *k { + return v.clone(); + } } - } - panic!("expected log message for {} not found", key); + panic!("expected log message for {} not found", key); + }) } struct RequestLine<'r> { @@ -95,11 +98,12 @@ impl Display for RequestLine<'_> { line.add_field("status", status.as_str())?; line.add_quoted_field("user_agent", request_header(self.req, header::USER_AGENT))?; - if let Some(metadata) = self.req.extensions().get::() { - for (key, value) in &metadata.entries { + CUSTOM_METADATA.with(|metadata| { + for (key, value) in &*metadata.borrow() { line.add_quoted_field(key, value)?; } - } + fmt::Result::Ok(()) + })?; if let Err(err) = self.res { line.add_quoted_field("error", err)?; diff --git a/src/middleware/normalize_path.rs b/src/middleware/normalize_path.rs index 2ac260d1a56..e27b4851772 100644 --- a/src/middleware/normalize_path.rs +++ b/src/middleware/normalize_path.rs @@ -43,7 +43,7 @@ impl Middleware for NormalizePath { .to_string_lossy() .to_string(); // non-Unicode is replaced with U+FFFD REPLACEMENT CHARACTER - add_custom_metadata(req, "normalized_path", path.clone()); + add_custom_metadata("normalized_path", path.clone()); *req.path_mut() = path; req.mut_extensions().insert(original_path); diff --git a/src/middleware/require_user_agent.rs b/src/middleware/require_user_agent.rs index ebfc7a749a3..05dfb4bbd5f 100644 --- a/src/middleware/require_user_agent.rs +++ b/src/middleware/require_user_agent.rs @@ -32,7 +32,7 @@ impl Handler for RequireUserAgent { let has_user_agent = !agent.is_empty() && agent != self.cdn_user_agent; let is_download = req.path().ends_with("download"); if !has_user_agent && !is_download { - add_custom_metadata(req, "cause", "no user agent"); + add_custom_metadata("cause", "no user agent"); let body = format!( include_str!("no_user_agent_message.txt"), request_header(req, "x-request-id"), diff --git a/src/router.rs b/src/router.rs index 737d6cb9320..68f44e05892 100644 --- a/src/router.rs +++ b/src/router.rs @@ -5,6 +5,7 @@ use conduit_router::{RequestParams, RouteBuilder, RoutePattern}; use crate::controllers::*; use crate::middleware::app::RequestApp; +use crate::middleware::log_request::add_custom_metadata; use crate::util::errors::{std_error, AppError, RouteBlocked}; use crate::util::EndpointResult; use crate::{App, Env}; @@ -171,7 +172,7 @@ impl Handler for C { Ok(resp) => Ok(resp), Err(e) => { if let Some(cause) = e.cause() { - req.log_metadata("cause", cause.to_string()) + add_custom_metadata("cause", cause.to_string()) }; match e.response() { Some(response) => Ok(response), @@ -249,7 +250,7 @@ mod tests { .unwrap(); assert_eq!(response.status(), StatusCode::BAD_REQUEST); assert_eq!( - crate::middleware::log_request::get_log_message(&req, "cause"), + crate::middleware::log_request::get_log_message("cause"), "middle error caused by invalid digit found in string" ); From e59cfcdea0ff14529e2d483d525691d6139e7ccf Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Tue, 8 Mar 2022 12:00:09 +0100 Subject: [PATCH 2/3] log information about sent emails --- src/email.rs | 63 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/src/email.rs b/src/email.rs index 7d9b913d669..ad0cbd1b670 100644 --- a/src/email.rs +++ b/src/email.rs @@ -3,10 +3,12 @@ use std::sync::Mutex; use crate::util::errors::{server_error, AppResult}; +use crate::middleware::log_request::add_custom_metadata; use lettre::transport::file::FileTransport; use lettre::transport::smtp::authentication::{Credentials, Mechanism}; use lettre::transport::smtp::SmtpTransport; use lettre::{Message, Transport}; +use rand::distributions::{Alphanumeric, DistString}; #[derive(Debug)] pub struct Emails { @@ -94,7 +96,21 @@ or go to https://{domain}/me/pending-invites to manage all of your crate ownersh } fn send(&self, recipient: &str, subject: &str, body: &str) -> AppResult<()> { + // The message ID is normally generated by the SMTP server, but if we let it generate the + // ID there will be no way for the crates.io application to know the ID of the message it + // just sent, as it's not included in the SMTP response. + // + // Our support staff needs to know the message ID to be able to find misdelivered emails. + // Because of that we're generating a random message ID, hoping the SMTP server doesn't + // replace it when it relays the message. + let message_id = format!( + "<{}@{}>", + Alphanumeric.sample_string(&mut rand::thread_rng(), 32), + crate::config::domain_name(), + ); + let email = Message::builder() + .message_id(Some(message_id.clone())) .to(recipient.parse()?) .from(self.sender_address().parse()?) .subject(subject) @@ -106,23 +122,42 @@ or go to https://{domain}/me/pending-invites to manage all of your crate ownersh login, password, } => { - SmtpTransport::relay(server)? - .credentials(Credentials::new(login.clone(), password.clone())) - .authentication(vec![Mechanism::Plain]) - .build() - .send(&email) - .map_err(|_| server_error("Error in sending email"))?; + add_custom_metadata("email_backend", "smtp"); + + SmtpTransport::relay(server) + .and_then(|transport| { + transport + .credentials(Credentials::new(login.clone(), password.clone())) + .authentication(vec![Mechanism::Plain]) + .build() + .send(&email) + }) + .map_err(|e| { + add_custom_metadata("email_error", e); + server_error("Failed to send the email") + })?; + + add_custom_metadata("email_id", message_id); } EmailBackend::FileSystem { path } => { - FileTransport::new(&path) - .send(&email) - .map_err(|_| server_error("Email file could not be generated"))?; + add_custom_metadata("email_backend", "fs"); + + let id = FileTransport::new(&path).send(&email).map_err(|err| { + add_custom_metadata("email_error", err); + server_error("Email file could not be generated") + })?; + + add_custom_metadata("email_path", path.join(format!("{id}.eml")).display()); + } + EmailBackend::Memory { mails } => { + add_custom_metadata("email_backend", "memory"); + + mails.lock().unwrap().push(StoredEmail { + to: recipient.into(), + subject: subject.into(), + body: body.into(), + }); } - EmailBackend::Memory { mails } => mails.lock().unwrap().push(StoredEmail { - to: recipient.into(), - subject: subject.into(), - body: body.into(), - }), } Ok(()) From b7a75f50aceb858fe72184378b41c95baf65f75c Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Tue, 8 Mar 2022 12:06:40 +0100 Subject: [PATCH 3/3] panic if non-smtp backends are used in productions --- src/app.rs | 4 ++-- src/config/base.rs | 2 +- src/email.rs | 8 +++++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/app.rs b/src/app.rs index 9a15e0f1f44..3955363c559 100644 --- a/src/app.rs +++ b/src/app.rs @@ -164,13 +164,13 @@ impl App { read_only_replica_database: replica_database, github, github_oauth, - config, version_id_cacher, downloads_counter: DownloadsCounter::new(), - emails: Emails::from_environment(), + emails: Emails::from_environment(&config), service_metrics: ServiceMetrics::new().expect("could not initialize service metrics"), instance_metrics, http_client, + config, } } diff --git a/src/config/base.rs b/src/config/base.rs index 6952787f60c..e839b627f10 100644 --- a/src/config/base.rs +++ b/src/config/base.rs @@ -12,7 +12,7 @@ use crate::{env, uploaders::Uploader, Env, Replica}; pub struct Base { - pub(super) env: Env, + pub env: Env, uploader: Uploader, } diff --git a/src/email.rs b/src/email.rs index ad0cbd1b670..8d995c82af7 100644 --- a/src/email.rs +++ b/src/email.rs @@ -3,7 +3,9 @@ use std::sync::Mutex; use crate::util::errors::{server_error, AppResult}; +use crate::config; use crate::middleware::log_request::add_custom_metadata; +use crate::Env; use lettre::transport::file::FileTransport; use lettre::transport::smtp::authentication::{Credentials, Mechanism}; use lettre::transport::smtp::SmtpTransport; @@ -18,7 +20,7 @@ pub struct Emails { impl Emails { /// Create a new instance detecting the backend from the environment. This will either connect /// to a SMTP server or store the emails on the local filesystem. - pub fn from_environment() -> Self { + pub fn from_environment(config: &config::Server) -> Self { let backend = match ( dotenv::var("MAILGUN_SMTP_LOGIN"), dotenv::var("MAILGUN_SMTP_PASSWORD"), @@ -34,6 +36,10 @@ impl Emails { }, }; + if config.base.env == Env::Production && !matches!(backend, EmailBackend::Smtp { .. }) { + panic!("only the smtp backend is allowed in production"); + } + Self { backend } }