From a02ca0dff51be7348ac3f4b08171dc408f24517e Mon Sep 17 00:00:00 2001 From: bors Date: Fri, 17 Mar 2023 01:06:49 +0000 Subject: [PATCH] Auto merge of #11824 - kylematsuda:windows-config-env-fix, r=ehuss Handle case mismatches when looking up env vars in the Config snapshot ### What does this PR try to resolve? Fixes #11814. Windows environment variables are case-insensitive, which causes problems when looking them up in the `Config` env snapshot. This PR adds another member (`case_insensitive_env`) in `Config` that maps upper-cased keys to their original values in the env (for example, `"PATH" => "Path"`). If lookup in `self.env` fails, this PR converts the key to upper case and looks it up in `self.case_insensitive_env` to obtain the correct key name if it exists (on Windows only). ### How should we test and review this PR? Please see the new tests in `testsuite/config.rs` and `testsuite/cargo_command.rs`. ### Additional information Currently, this uses `str::to_uppercase` to uppercase the keys. This requires key to be valid UTF-8, and may disagree with how the OS uppercases things (see the link in [this comment](https://github.com/rust-lang/cargo/issues/11814#issuecomment-1462870983) for details). --- src/cargo/util/config/de.rs | 2 +- src/cargo/util/config/environment.rs | 184 +++++++++++++++++++++++++++ src/cargo/util/config/mod.rs | 68 +++------- tests/testsuite/cargo_command.rs | 26 ++++ tests/testsuite/config.rs | 25 ++++ 5 files changed, 257 insertions(+), 48 deletions(-) create mode 100644 src/cargo/util/config/environment.rs diff --git a/src/cargo/util/config/de.rs b/src/cargo/util/config/de.rs index ab4dd93b36a..a9147ab0306 100644 --- a/src/cargo/util/config/de.rs +++ b/src/cargo/util/config/de.rs @@ -424,7 +424,7 @@ impl<'config> ValueDeserializer<'config> { let definition = { let env = de.key.as_env_key(); let env_def = Definition::Environment(env.to_string()); - match (de.config.env_has_key(env), de.config.get_cv(&de.key)?) { + match (de.config.env.contains_key(env), de.config.get_cv(&de.key)?) { (true, Some(cv)) => { // Both, pick highest priority. if env_def.is_higher_priority(cv.definition()) { diff --git a/src/cargo/util/config/environment.rs b/src/cargo/util/config/environment.rs new file mode 100644 index 00000000000..bb049546a61 --- /dev/null +++ b/src/cargo/util/config/environment.rs @@ -0,0 +1,184 @@ +//! Encapsulates snapshotting of environment variables. + +use std::collections::HashMap; +use std::ffi::{OsStr, OsString}; + +use crate::util::errors::CargoResult; +use anyhow::{anyhow, bail}; + +/// Generate `case_insensitive_env` and `normalized_env` from the `env`. +fn make_case_insensitive_and_normalized_env( + env: &HashMap, +) -> (HashMap, HashMap) { + let case_insensitive_env: HashMap<_, _> = env + .keys() + .filter_map(|k| k.to_str()) + .map(|k| (k.to_uppercase(), k.to_owned())) + .collect(); + let normalized_env = env + .iter() + // Only keep entries where both the key and value are valid UTF-8, + // since the config env vars only support UTF-8 keys and values. + // Otherwise, the normalized map warning could incorrectly warn about entries that can't be + // read by the config system. + // Please see the docs for `Env` for more context. + .filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?))) + .map(|(k, _)| (k.to_uppercase().replace("-", "_"), k.to_owned())) + .collect(); + (case_insensitive_env, normalized_env) +} + +/// A snapshot of the environment variables available to [`super::Config`]. +/// +/// Currently, the [`Config`](super::Config) supports lookup of environment variables +/// through two different means: +/// +/// - [`Config::get_env`](super::Config::get_env) +/// and [`Config::get_env_os`](super::Config::get_env_os) +/// for process environment variables (similar to [`std::env::var`] and [`std::env::var_os`]), +/// - Typed Config Value API via [`Config::get`](super::Config::get). +/// This is only available for `CARGO_` prefixed environment keys. +/// +/// This type contains the env var snapshot and helper methods for both APIs. +#[derive(Debug)] +pub struct Env { + /// A snapshot of the process's environment variables. + env: HashMap, + /// Used in the typed Config value API for warning messages when config keys are + /// given in the wrong format. + /// + /// Maps from "normalized" (upper case and with "-" replaced by "_") env keys + /// to the actual keys in the environment. + /// The normalized format is the one expected by Cargo. + /// + /// This only holds env keys that are valid UTF-8, since [`super::ConfigKey`] only supports UTF-8 keys. + /// In addition, this only holds env keys whose value in the environment is also valid UTF-8, + /// since the typed Config value API only supports UTF-8 values. + normalized_env: HashMap, + /// Used to implement `get_env` and `get_env_os` on Windows, where env keys are case-insensitive. + /// + /// Maps from uppercased env keys to the actual key in the environment. + /// For example, this might hold a pair `("PATH", "Path")`. + /// Currently only supports UTF-8 keys and values. + case_insensitive_env: HashMap, +} + +impl Env { + /// Create a new `Env` from process's environment variables. + pub fn new() -> Self { + let env: HashMap<_, _> = std::env::vars_os().collect(); + let (case_insensitive_env, normalized_env) = make_case_insensitive_and_normalized_env(&env); + Self { + env, + case_insensitive_env, + normalized_env, + } + } + + /// Set the env directly from a `HashMap`. + /// This should be used for debugging purposes only. + pub(super) fn from_map(env: HashMap) -> Self { + let env = env.into_iter().map(|(k, v)| (k.into(), v.into())).collect(); + let (case_insensitive_env, normalized_env) = make_case_insensitive_and_normalized_env(&env); + Self { + env, + case_insensitive_env, + normalized_env, + } + } + + /// Returns all environment variables as an iterator, + /// keeping only entries where both the key and value are valid UTF-8. + pub fn iter_str(&self) -> impl Iterator { + self.env + .iter() + .filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?))) + } + + /// Returns all environment variable keys, filtering out keys that are not valid UTF-8. + pub fn keys_str(&self) -> impl Iterator { + self.env.keys().filter_map(|k| k.to_str()) + } + + /// Get the value of environment variable `key` through the `Config` snapshot. + /// + /// This can be used similarly to `std::env::var_os`. + /// On Windows, we check for case mismatch since environment keys are case-insensitive. + pub fn get_env_os(&self, key: impl AsRef) -> Option { + match self.env.get(key.as_ref()) { + Some(s) => Some(s.clone()), + None => { + if cfg!(windows) { + self.get_env_case_insensitive(key).cloned() + } else { + None + } + } + } + } + + /// Get the value of environment variable `key` through the `self.env` snapshot. + /// + /// This can be used similarly to `std::env::var`. + /// On Windows, we check for case mismatch since environment keys are case-insensitive. + pub fn get_env(&self, key: impl AsRef) -> CargoResult { + let key = key.as_ref(); + let s = self + .get_env_os(key) + .ok_or_else(|| anyhow!("{key:?} could not be found in the environment snapshot"))?; + + match s.to_str() { + Some(s) => Ok(s.to_owned()), + None => bail!("environment variable value is not valid unicode: {s:?}"), + } + } + + /// Performs a case-insensitive lookup of `key` in the environment. + /// + /// This is relevant on Windows, where environment variables are case-insensitive. + /// Note that this only works on keys that are valid UTF-8 and it uses Unicode uppercase, + /// which may differ from the OS's notion of uppercase. + fn get_env_case_insensitive(&self, key: impl AsRef) -> Option<&OsString> { + let upper_case_key = key.as_ref().to_str()?.to_uppercase(); + let env_key: &OsStr = self.case_insensitive_env.get(&upper_case_key)?.as_ref(); + self.env.get(env_key) + } + + /// Get the value of environment variable `key` as a `&str`. + /// Returns `None` if `key` is not in `self.env` or if the value is not valid UTF-8. + /// + /// This is intended for use in private methods of `Config`, + /// and does not check for env key case mismatch. + /// + /// This is case-sensitive on Windows (even though environment keys on Windows are usually + /// case-insensitive) due to an unintended regression in 1.28 (via #5552). + /// This should only affect keys used for cargo's config-system env variables (`CARGO_` + /// prefixed ones), which are currently all uppercase. + /// We may want to consider rectifying it if users report issues. + /// One thing that adds a wrinkle here is the unstable advanced-env option that *requires* + /// case-sensitive keys. + /// + /// Do not use this for any other purposes. + /// Use [`Env::get_env_os`] or [`Env::get_env`] instead, which properly handle case + /// insensitivity on Windows. + pub(super) fn get_str(&self, key: impl AsRef) -> Option<&str> { + self.env.get(key.as_ref()).and_then(|s| s.to_str()) + } + + /// Check if the environment contains `key`. + /// + /// This is intended for use in private methods of `Config`, + /// and does not check for env key case mismatch. + /// See the docstring of [`Env::get_str`] for more context. + pub(super) fn contains_key(&self, key: impl AsRef) -> bool { + self.env.contains_key(key.as_ref()) + } + + /// Looks up a normalized `key` in the `normalized_env`. + /// Returns the corresponding (non-normalized) env key if it exists, else `None`. + /// + /// This is used by [`super::Config::check_environment_key_case_mismatch`]. + pub(super) fn get_normalized(&self, key: &str) -> Option<&str> { + self.normalized_env.get(key).map(|s| s.as_ref()) + } +} diff --git a/src/cargo/util/config/mod.rs b/src/cargo/util/config/mod.rs index b5c781efddd..22915918270 100644 --- a/src/cargo/util/config/mod.rs +++ b/src/cargo/util/config/mod.rs @@ -100,6 +100,9 @@ pub use path::{ConfigRelativePath, PathAndArgs}; mod target; pub use target::{TargetCfgConfig, TargetConfig}; +mod environment; +use environment::Env; + // Helper macro for creating typed access methods. macro_rules! get_value_typed { ($name:ident, $ty:ty, $variant:ident, $expected:expr) => { @@ -199,10 +202,8 @@ pub struct Config { creation_time: Instant, /// Target Directory via resolved Cli parameter target_dir: Option, - /// Environment variables, separated to assist testing. - env: HashMap, - /// Environment variables, converted to uppercase to check for case mismatch - upper_case_env: HashMap, + /// Environment variable snapshot. + env: Env, /// Tracks which sources have been updated to avoid multiple updates. updated_sources: LazyCell>>, /// Cache of credentials from configuration or credential providers. @@ -260,16 +261,10 @@ impl Config { } }); - let env: HashMap<_, _> = env::vars_os().collect(); - - let upper_case_env = env - .iter() - .filter_map(|(k, _)| k.to_str()) // Only keep valid UTF-8 - .map(|k| (k.to_uppercase().replace("-", "_"), k.to_owned())) - .collect(); + let env = Env::new(); - let cache_key: &OsStr = "CARGO_CACHE_RUSTC_INFO".as_ref(); - let cache_rustc_info = match env.get(cache_key) { + let cache_key = "CARGO_CACHE_RUSTC_INFO"; + let cache_rustc_info = match env.get_env_os(cache_key) { Some(cache) => cache != "0", _ => true, }; @@ -303,7 +298,6 @@ impl Config { creation_time: Instant::now(), target_dir: None, env, - upper_case_env, updated_sources: LazyCell::new(), credential_cache: LazyCell::new(), package_cache_lock: RefCell::new(None), @@ -658,7 +652,7 @@ impl Config { // Root table can't have env value. return Ok(cv); } - let env = self.get_env_str(key.as_env_key()); + let env = self.env.get_str(key.as_env_key()); let env_def = Definition::Environment(key.as_env_key().to_string()); let use_env = match (&cv, env) { // Lists are always merged. @@ -729,20 +723,18 @@ impl Config { /// Helper primarily for testing. pub fn set_env(&mut self, env: HashMap) { - self.env = env.into_iter().map(|(k, v)| (k.into(), v.into())).collect(); + self.env = Env::from_map(env); } - /// Returns all environment variables as an iterator, filtering out entries - /// that are not valid UTF-8. + /// Returns all environment variables as an iterator, + /// keeping only entries where both the key and value are valid UTF-8. pub(crate) fn env(&self) -> impl Iterator { - self.env - .iter() - .filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?))) + self.env.iter_str() } - /// Returns all environment variable keys, filtering out entries that are not valid UTF-8. + /// Returns all environment variable keys, filtering out keys that are not valid UTF-8. fn env_keys(&self) -> impl Iterator { - self.env.iter().filter_map(|(k, _)| k.to_str()) + self.env.keys_str() } fn get_config_env(&self, key: &ConfigKey) -> Result, ConfigError> @@ -750,7 +742,7 @@ impl Config { T: FromStr, ::Err: fmt::Display, { - match self.get_env_str(key.as_env_key()) { + match self.env.get_str(key.as_env_key()) { Some(value) => { let definition = Definition::Environment(key.as_env_key().to_string()); Ok(Some(Value { @@ -771,39 +763,21 @@ impl Config { /// /// This can be used similarly to `std::env::var`. pub fn get_env(&self, key: impl AsRef) -> CargoResult { - let key = key.as_ref(); - let s = match self.env.get(key) { - Some(s) => s, - None => bail!("{key:?} could not be found in the environment snapshot",), - }; - match s.to_str() { - Some(s) => Ok(s.to_owned()), - None => bail!("environment variable value is not valid unicode: {s:?}"), - } + self.env.get_env(key) } /// Get the value of environment variable `key` through the `Config` snapshot. /// /// This can be used similarly to `std::env::var_os`. pub fn get_env_os(&self, key: impl AsRef) -> Option { - self.env.get(key.as_ref()).cloned() - } - - /// Get the value of environment variable `key`. - /// Returns `None` if `key` is not in `self.env` or if the value is not valid UTF-8. - fn get_env_str(&self, key: impl AsRef) -> Option<&str> { - self.env.get(key.as_ref()).and_then(|s| s.to_str()) - } - - fn env_has_key(&self, key: impl AsRef) -> bool { - self.env.contains_key(key.as_ref()) + self.env.get_env_os(key) } /// Check if the [`Config`] contains a given [`ConfigKey`]. /// /// See `ConfigMapAccess` for a description of `env_prefix_ok`. fn has_key(&self, key: &ConfigKey, env_prefix_ok: bool) -> CargoResult { - if self.env_has_key(key.as_env_key()) { + if self.env.contains_key(key.as_env_key()) { return Ok(true); } if env_prefix_ok { @@ -821,7 +795,7 @@ impl Config { } fn check_environment_key_case_mismatch(&self, key: &ConfigKey) { - if let Some(env_key) = self.upper_case_env.get(key.as_env_key()) { + if let Some(env_key) = self.env.get_normalized(key.as_env_key()) { let _ = self.shell().warn(format!( "Environment variables are expected to use uppercase letters and underscores, \ the variable `{}` will be ignored and have no effect", @@ -920,7 +894,7 @@ impl Config { key: &ConfigKey, output: &mut Vec<(String, Definition)>, ) -> CargoResult<()> { - let env_val = match self.get_env_str(key.as_env_key()) { + let env_val = match self.env.get_str(key.as_env_key()) { Some(v) => v, None => { self.check_environment_key_case_mismatch(key); diff --git a/tests/testsuite/cargo_command.rs b/tests/testsuite/cargo_command.rs index f07cec332a6..62869387f2a 100644 --- a/tests/testsuite/cargo_command.rs +++ b/tests/testsuite/cargo_command.rs @@ -104,6 +104,32 @@ fn list_command_looks_at_path() { ); } +#[cfg(windows)] +#[cargo_test] +fn list_command_looks_at_path_case_mismatch() { + let proj = project() + .executable(Path::new("path-test").join("cargo-1"), "") + .build(); + + let mut path = path(); + path.push(proj.root().join("path-test")); + let path = env::join_paths(path.iter()).unwrap(); + + // See issue #11814: Environment variable names are case-insensitive on Windows. + // We need to check that having "Path" instead of "PATH" is okay. + let output = cargo_process("-v --list") + .env("Path", &path) + .env_remove("PATH") + .exec_with_output() + .unwrap(); + let output = str::from_utf8(&output.stdout).unwrap(); + assert!( + output.contains("\n 1 "), + "missing 1: {}", + output + ); +} + #[cargo_test] fn list_command_handles_known_external_commands() { let p = project() diff --git a/tests/testsuite/config.rs b/tests/testsuite/config.rs index 0835c03070e..92e1f42643c 100644 --- a/tests/testsuite/config.rs +++ b/tests/testsuite/config.rs @@ -211,6 +211,31 @@ f1 = 123 assert_eq!(s, S { f1: Some(456) }); } +#[cfg(windows)] +#[cargo_test] +fn environment_variable_casing() { + // Issue #11814: Environment variable names are case-insensitive on Windows. + let config = ConfigBuilder::new() + .env("Path", "abc") + .env("Two-Words", "abc") + .env("two_words", "def") + .build(); + + let var = config.get_env("PATH").unwrap(); + assert_eq!(var, String::from("abc")); + + let var = config.get_env("path").unwrap(); + assert_eq!(var, String::from("abc")); + + let var = config.get_env("TWO-WORDS").unwrap(); + assert_eq!(var, String::from("abc")); + + // Make sure that we can still distinguish between dashes and underscores + // in variable names. + let var = config.get_env("Two_Words").unwrap(); + assert_eq!(var, String::from("def")); +} + #[cargo_test] fn config_works_with_extension() { write_config_toml(