Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle case mismatches when looking up env vars in the Config snapshot #11824

Merged
merged 7 commits into from Mar 17, 2023
2 changes: 1 addition & 1 deletion src/cargo/util/config/de.rs
Expand Up @@ -424,7 +424,7 @@ impl<'config> ValueDeserializer<'config> {
let definition = {
let env = de.key.as_env_key();
let env_def = Definition::Environment(env.to_string());
match (de.config.env_has_key(env), de.config.get_cv(&de.key)?) {
match (de.config.env.contains_key(env), de.config.get_cv(&de.key)?) {
(true, Some(cv)) => {
// Both, pick highest priority.
if env_def.is_higher_priority(cv.definition()) {
Expand Down
184 changes: 184 additions & 0 deletions src/cargo/util/config/environment.rs
@@ -0,0 +1,184 @@
//! Encapsulates snapshotting of environment variables.

use std::collections::HashMap;
use std::ffi::{OsStr, OsString};

use crate::util::errors::CargoResult;
use anyhow::{anyhow, bail};

/// Generate `case_insensitive_env` and `normalized_env` from the `env`.
fn make_case_insensitive_and_normalized_env(
env: &HashMap<OsString, OsString>,
) -> (HashMap<String, String>, HashMap<String, String>) {
let case_insensitive_env: HashMap<_, _> = env
.keys()
.filter_map(|k| k.to_str())
.map(|k| (k.to_uppercase(), k.to_owned()))
.collect();
let normalized_env = env
.iter()
// Only keep entries where both the key and value are valid UTF-8,
// since the config env vars only support UTF-8 keys and values.
// Otherwise, the normalized map warning could incorrectly warn about entries that can't be
// read by the config system.
// Please see the docs for `Env` for more context.
.filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?)))
.map(|(k, _)| (k.to_uppercase().replace("-", "_"), k.to_owned()))
.collect();
(case_insensitive_env, normalized_env)
}

/// A snapshot of the environment variables available to [`super::Config`].
///
/// Currently, the [`Config`](super::Config) supports lookup of environment variables
/// through two different means:
///
/// - [`Config::get_env`](super::Config::get_env)
/// and [`Config::get_env_os`](super::Config::get_env_os)
/// for process environment variables (similar to [`std::env::var`] and [`std::env::var_os`]),
/// - Typed Config Value API via [`Config::get`](super::Config::get).
/// This is only available for `CARGO_` prefixed environment keys.
///
/// This type contains the env var snapshot and helper methods for both APIs.
#[derive(Debug)]
pub struct Env {
/// A snapshot of the process's environment variables.
env: HashMap<OsString, OsString>,
/// Used in the typed Config value API for warning messages when config keys are
/// given in the wrong format.
///
/// Maps from "normalized" (upper case and with "-" replaced by "_") env keys
/// to the actual keys in the environment.
/// The normalized format is the one expected by Cargo.
///
/// This only holds env keys that are valid UTF-8, since [`super::ConfigKey`] only supports UTF-8 keys.
/// In addition, this only holds env keys whose value in the environment is also valid UTF-8,
/// since the typed Config value API only supports UTF-8 values.
normalized_env: HashMap<String, String>,
/// Used to implement `get_env` and `get_env_os` on Windows, where env keys are case-insensitive.
///
/// Maps from uppercased env keys to the actual key in the environment.
/// For example, this might hold a pair `("PATH", "Path")`.
/// Currently only supports UTF-8 keys and values.
case_insensitive_env: HashMap<String, String>,
}

impl Env {
/// Create a new `Env` from process's environment variables.
pub fn new() -> Self {
let env: HashMap<_, _> = std::env::vars_os().collect();
let (case_insensitive_env, normalized_env) = make_case_insensitive_and_normalized_env(&env);
Self {
env,
case_insensitive_env,
normalized_env,
}
}

/// Set the env directly from a `HashMap`.
/// This should be used for debugging purposes only.
pub(super) fn from_map(env: HashMap<String, String>) -> Self {
let env = env.into_iter().map(|(k, v)| (k.into(), v.into())).collect();
let (case_insensitive_env, normalized_env) = make_case_insensitive_and_normalized_env(&env);
Self {
env,
case_insensitive_env,
normalized_env,
}
}

/// Returns all environment variables as an iterator,
/// keeping only entries where both the key and value are valid UTF-8.
pub fn iter_str(&self) -> impl Iterator<Item = (&str, &str)> {
self.env
.iter()
.filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?)))
}

/// Returns all environment variable keys, filtering out keys that are not valid UTF-8.
pub fn keys_str(&self) -> impl Iterator<Item = &str> {
self.env.keys().filter_map(|k| k.to_str())
}

/// Get the value of environment variable `key` through the `Config` snapshot.
///
/// This can be used similarly to `std::env::var_os`.
/// On Windows, we check for case mismatch since environment keys are case-insensitive.
pub fn get_env_os(&self, key: impl AsRef<OsStr>) -> Option<OsString> {
match self.env.get(key.as_ref()) {
Some(s) => Some(s.clone()),
None => {
if cfg!(windows) {
self.get_env_case_insensitive(key).cloned()
} else {
None
}
}
}
}

/// Get the value of environment variable `key` through the `self.env` snapshot.
///
/// This can be used similarly to `std::env::var`.
/// On Windows, we check for case mismatch since environment keys are case-insensitive.
pub fn get_env(&self, key: impl AsRef<OsStr>) -> CargoResult<String> {
let key = key.as_ref();
let s = self
.get_env_os(key)
.ok_or_else(|| anyhow!("{key:?} could not be found in the environment snapshot"))?;

match s.to_str() {
Some(s) => Ok(s.to_owned()),
None => bail!("environment variable value is not valid unicode: {s:?}"),
}
}

/// Performs a case-insensitive lookup of `key` in the environment.
///
/// This is relevant on Windows, where environment variables are case-insensitive.
/// Note that this only works on keys that are valid UTF-8 and it uses Unicode uppercase,
/// which may differ from the OS's notion of uppercase.
fn get_env_case_insensitive(&self, key: impl AsRef<OsStr>) -> Option<&OsString> {
let upper_case_key = key.as_ref().to_str()?.to_uppercase();
let env_key: &OsStr = self.case_insensitive_env.get(&upper_case_key)?.as_ref();
self.env.get(env_key)
}

/// Get the value of environment variable `key` as a `&str`.
/// Returns `None` if `key` is not in `self.env` or if the value is not valid UTF-8.
///
/// This is intended for use in private methods of `Config`,
/// and does not check for env key case mismatch.
Comment on lines +150 to +151
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this include a comment explaining why it is case-sensitive?

I was looking over the history, and it looks like this was an unintended regression in 1.28 via #5552. I think I somehow forgot about windows case sensitivity at that time, and have been confused on a few occasions (like #9169) due to the fact that Command used to force keys to be uppercase on Windows (fixed in 1.55 via rust-lang/rust#85270). When launching cargo from within cargo's testsuite, it used Command which ended up mucking with the environment keys.

I don't think this needs to be fixed here in this PR. I don't think anyone has complained about this since then, so maybe it's best just to leave it.

So, a comment could explain something like:

This is case-sensitive on Windows (even though Windows is usually case-insensitive) due to an unintended regression in 1.28 (via #5552). This should only affect keys used for cargo's config-system env variables (CARGO_ prefixed ones) which are currently all uppercase. We may want to consider rectifying it if users report issues. One thing that adds a wrinkle here is the unstable advanced-env option which requires case-sensitive keys.

Do not use this for any other purposes. Use [Env::get_env_os] or [Env::get_env] instead, which properly handle case insensitivity on Windows.

///
/// This is case-sensitive on Windows (even though environment keys on Windows are usually
/// case-insensitive) due to an unintended regression in 1.28 (via #5552).
/// This should only affect keys used for cargo's config-system env variables (`CARGO_`
/// prefixed ones), which are currently all uppercase.
/// We may want to consider rectifying it if users report issues.
/// One thing that adds a wrinkle here is the unstable advanced-env option that *requires*
/// case-sensitive keys.
///
/// Do not use this for any other purposes.
/// Use [`Env::get_env_os`] or [`Env::get_env`] instead, which properly handle case
/// insensitivity on Windows.
pub(super) fn get_str(&self, key: impl AsRef<OsStr>) -> Option<&str> {
self.env.get(key.as_ref()).and_then(|s| s.to_str())
}

/// Check if the environment contains `key`.
///
/// This is intended for use in private methods of `Config`,
/// and does not check for env key case mismatch.
/// See the docstring of [`Env::get_str`] for more context.
pub(super) fn contains_key(&self, key: impl AsRef<OsStr>) -> bool {
self.env.contains_key(key.as_ref())
}

/// Looks up a normalized `key` in the `normalized_env`.
/// Returns the corresponding (non-normalized) env key if it exists, else `None`.
///
/// This is used by [`super::Config::check_environment_key_case_mismatch`].
pub(super) fn get_normalized(&self, key: &str) -> Option<&str> {
self.normalized_env.get(key).map(|s| s.as_ref())
}
}
68 changes: 21 additions & 47 deletions src/cargo/util/config/mod.rs
Expand Up @@ -100,6 +100,9 @@ pub use path::{ConfigRelativePath, PathAndArgs};
mod target;
pub use target::{TargetCfgConfig, TargetConfig};

mod environment;
use environment::Env;

// Helper macro for creating typed access methods.
macro_rules! get_value_typed {
($name:ident, $ty:ty, $variant:ident, $expected:expr) => {
Expand Down Expand Up @@ -199,10 +202,8 @@ pub struct Config {
creation_time: Instant,
/// Target Directory via resolved Cli parameter
target_dir: Option<Filesystem>,
/// Environment variables, separated to assist testing.
env: HashMap<OsString, OsString>,
/// Environment variables, converted to uppercase to check for case mismatch
upper_case_env: HashMap<String, String>,
/// Environment variable snapshot.
env: Env,
/// Tracks which sources have been updated to avoid multiple updates.
updated_sources: LazyCell<RefCell<HashSet<SourceId>>>,
/// Cache of credentials from configuration or credential providers.
Expand Down Expand Up @@ -260,16 +261,10 @@ impl Config {
}
});

let env: HashMap<_, _> = env::vars_os().collect();

let upper_case_env = env
.iter()
.filter_map(|(k, _)| k.to_str()) // Only keep valid UTF-8
.map(|k| (k.to_uppercase().replace("-", "_"), k.to_owned()))
.collect();
let env = Env::new();

let cache_key: &OsStr = "CARGO_CACHE_RUSTC_INFO".as_ref();
let cache_rustc_info = match env.get(cache_key) {
let cache_key = "CARGO_CACHE_RUSTC_INFO";
let cache_rustc_info = match env.get_env_os(cache_key) {
Some(cache) => cache != "0",
_ => true,
};
Expand Down Expand Up @@ -303,7 +298,6 @@ impl Config {
creation_time: Instant::now(),
target_dir: None,
env,
upper_case_env,
updated_sources: LazyCell::new(),
credential_cache: LazyCell::new(),
package_cache_lock: RefCell::new(None),
Expand Down Expand Up @@ -658,7 +652,7 @@ impl Config {
// Root table can't have env value.
return Ok(cv);
}
let env = self.get_env_str(key.as_env_key());
let env = self.env.get_str(key.as_env_key());
let env_def = Definition::Environment(key.as_env_key().to_string());
let use_env = match (&cv, env) {
// Lists are always merged.
Expand Down Expand Up @@ -729,28 +723,26 @@ impl Config {

/// Helper primarily for testing.
pub fn set_env(&mut self, env: HashMap<String, String>) {
self.env = env.into_iter().map(|(k, v)| (k.into(), v.into())).collect();
self.env = Env::from_map(env);
}

/// Returns all environment variables as an iterator, filtering out entries
/// that are not valid UTF-8.
/// Returns all environment variables as an iterator,
/// keeping only entries where both the key and value are valid UTF-8.
pub(crate) fn env(&self) -> impl Iterator<Item = (&str, &str)> {
self.env
.iter()
.filter_map(|(k, v)| Some((k.to_str()?, v.to_str()?)))
self.env.iter_str()
}

/// Returns all environment variable keys, filtering out entries that are not valid UTF-8.
/// Returns all environment variable keys, filtering out keys that are not valid UTF-8.
fn env_keys(&self) -> impl Iterator<Item = &str> {
self.env.iter().filter_map(|(k, _)| k.to_str())
self.env.keys_str()
}

fn get_config_env<T>(&self, key: &ConfigKey) -> Result<OptValue<T>, ConfigError>
where
T: FromStr,
<T as FromStr>::Err: fmt::Display,
{
match self.get_env_str(key.as_env_key()) {
match self.env.get_str(key.as_env_key()) {
Some(value) => {
let definition = Definition::Environment(key.as_env_key().to_string());
Ok(Some(Value {
Expand All @@ -771,39 +763,21 @@ impl Config {
///
/// This can be used similarly to `std::env::var`.
pub fn get_env(&self, key: impl AsRef<OsStr>) -> CargoResult<String> {
let key = key.as_ref();
let s = match self.env.get(key) {
Some(s) => s,
None => bail!("{key:?} could not be found in the environment snapshot",),
};
match s.to_str() {
Some(s) => Ok(s.to_owned()),
None => bail!("environment variable value is not valid unicode: {s:?}"),
}
self.env.get_env(key)
}

/// Get the value of environment variable `key` through the `Config` snapshot.
///
/// This can be used similarly to `std::env::var_os`.
pub fn get_env_os(&self, key: impl AsRef<OsStr>) -> Option<OsString> {
self.env.get(key.as_ref()).cloned()
}

/// Get the value of environment variable `key`.
/// Returns `None` if `key` is not in `self.env` or if the value is not valid UTF-8.
fn get_env_str(&self, key: impl AsRef<OsStr>) -> Option<&str> {
self.env.get(key.as_ref()).and_then(|s| s.to_str())
}

fn env_has_key(&self, key: impl AsRef<OsStr>) -> bool {
self.env.contains_key(key.as_ref())
self.env.get_env_os(key)
}

/// Check if the [`Config`] contains a given [`ConfigKey`].
///
/// See `ConfigMapAccess` for a description of `env_prefix_ok`.
fn has_key(&self, key: &ConfigKey, env_prefix_ok: bool) -> CargoResult<bool> {
if self.env_has_key(key.as_env_key()) {
if self.env.contains_key(key.as_env_key()) {
return Ok(true);
}
if env_prefix_ok {
Expand All @@ -821,7 +795,7 @@ impl Config {
}

fn check_environment_key_case_mismatch(&self, key: &ConfigKey) {
if let Some(env_key) = self.upper_case_env.get(key.as_env_key()) {
if let Some(env_key) = self.env.get_normalized(key.as_env_key()) {
let _ = self.shell().warn(format!(
"Environment variables are expected to use uppercase letters and underscores, \
the variable `{}` will be ignored and have no effect",
Expand Down Expand Up @@ -920,7 +894,7 @@ impl Config {
key: &ConfigKey,
output: &mut Vec<(String, Definition)>,
) -> CargoResult<()> {
let env_val = match self.get_env_str(key.as_env_key()) {
let env_val = match self.env.get_str(key.as_env_key()) {
Some(v) => v,
None => {
self.check_environment_key_case_mismatch(key);
Expand Down
26 changes: 26 additions & 0 deletions tests/testsuite/cargo_command.rs
Expand Up @@ -104,6 +104,32 @@ fn list_command_looks_at_path() {
);
}

#[cfg(windows)]
#[cargo_test]
fn list_command_looks_at_path_case_mismatch() {
let proj = project()
.executable(Path::new("path-test").join("cargo-1"), "")
.build();

let mut path = path();
path.push(proj.root().join("path-test"));
let path = env::join_paths(path.iter()).unwrap();

// See issue #11814: Environment variable names are case-insensitive on Windows.
// We need to check that having "Path" instead of "PATH" is okay.
let output = cargo_process("-v --list")
.env("Path", &path)
.env_remove("PATH")
.exec_with_output()
.unwrap();
let output = str::from_utf8(&output.stdout).unwrap();
assert!(
output.contains("\n 1 "),
"missing 1: {}",
output
);
}

#[cargo_test]
fn list_command_handles_known_external_commands() {
let p = project()
Expand Down