Skip to content

Commit

Permalink
Merge pull request #858 from koordinates/env-overrides
Browse files Browse the repository at this point in the history
Refactor: simplify env-overrides for subprocess calls
  • Loading branch information
olsen232 committed Jun 6, 2023
2 parents a6790a1 + 10dd9f9 commit a775242
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 37 deletions.
6 changes: 2 additions & 4 deletions kart/point_cloud/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@ def pdal_execute_pipeline(pipeline, *, env_overrides=None):
Returns a list of metadata output from each stage.
"""

env = subprocess.tool_environment()
# NOTE: Kart itself doesn't currently use env_overrides, but don't remove it.
# PDAL uses environment variables for various purposes, and this is helpful for `kart ext-run` scripts.
env.update(env_overrides or {})

if is_windows:
# On windows we can't keep the metadata file open while pdal writes to it:
Expand All @@ -28,7 +26,7 @@ def pdal_execute_pipeline(pipeline, *, env_overrides=None):
input=json.dumps(pipeline),
encoding="utf-8",
capture_output=True,
env=env,
env_overrides=env_overrides,
)

with metadata_path.open(encoding="utf-8") as f:
Expand All @@ -46,7 +44,7 @@ def pdal_execute_pipeline(pipeline, *, env_overrides=None):
input=json.dumps(pipeline),
encoding="utf-8",
capture_output=True,
env=env,
env_overrides=env_overrides,
)
f_metadata.seek(0)
metadata = json.load(f_metadata)
Expand Down
8 changes: 4 additions & 4 deletions kart/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,21 +737,21 @@ def _signature(self, person_type, **overrides):
# 'git var' lets us use the environment variables to
# control the user info, e.g. GIT_AUTHOR_DATE.
# libgit2/pygit2 doesn't handle those env vars at all :(
env = os.environ.copy()
env_overrides = {}

name = overrides.pop("name", None)
if name is not None:
env[f"GIT_{person_type}_NAME"] = name
env_overrides[f"GIT_{person_type}_NAME"] = name

email = overrides.pop("email", None)
if email is not None:
env[f"GIT_{person_type}_EMAIL"] = email
env_overrides[f"GIT_{person_type}_EMAIL"] = email

output = subprocess.check_output(
["git", "var", f"GIT_{person_type}_IDENT"],
cwd=self.path,
encoding="utf8",
env=subprocess.tool_environment(env),
env_overrides=env_overrides,
)
m = self._GIT_VAR_OUTPUT_RE.match(output)
kwargs = m.groupdict()
Expand Down
30 changes: 23 additions & 7 deletions kart/subprocess_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,18 @@ def Popen(cmd, **kwargs):


def add_default_kwargs(kwargs_dict, check_output=False):
# Set up the environment if not supplied by the caller.
if "env" not in kwargs_dict:
kwargs_dict.setdefault("env", tool_environment())
# We could allow the caller to supply the env, but env_overrides is generally more useful.
# You can disable this assert if you are sure you need this (and not env_overrides).
assert "env" not in kwargs_dict

# Specifically set sys.stdin, sys.stderr, sys.stdout if this is running via helper mode.
if "env_overrides" in kwargs_dict:
env = tool_environment(env_overrides=kwargs_dict.pop("env_overrides"))
else:
env = tool_environment()

kwargs_dict["env"] = env

# Explicitly set sys.stdin, sys.stderr, sys.stdout if this is running via helper mode.
# See the explanation for why we need this at the top of the file.
if os.environ.get("KART_HELPER_PID"):
if "input" not in kwargs_dict:
Expand Down Expand Up @@ -282,15 +289,21 @@ def _run_with_capture_then_exit(cmd):
sys.exit(p.returncode)


def tool_environment(env=None):
"""Returns a dict of environment for launching an external process."""
def tool_environment(*, base_env=None, env_overrides=None):
"""
Returns a dict of environment variables for launching an external process.
Sets the PATH, GIT_CONFIG_PARAMETERS, etc appropriately.
base_env - the environment to start from, defaults to os.environ if not set.
env_overrides - any extra environment variables to add after the tool-environment
"""
# Adds our GIT_CONFIG_PARAMETERS to os.environ, so that if we launch git as a subprocess, it will have the config we want.
# TODO - a bit strange that this function modifies both the actual os.environ and the tool_environment that generally inherits
# from it - we should stick to modifying one or the other.
init_git_config()
# TODO - tool_environment would be easier to use if you could supply it with a dict of extra environment variables that
# you need, instead of having to supply the entire env.
env = (env or os.environ).copy()
env = (base_env or os.environ).copy()

# Add kart bin directory to the start of the path:
kart_bin_path = str(Path(sys.executable).parents[0])
Expand All @@ -305,6 +318,9 @@ def tool_environment(env=None):
env["LD_LIBRARY_PATH"] = env["LD_LIBRARY_PATH_ORIG"]
else:
env.pop("LD_LIBRARY_PATH", None)

if env_overrides:
env.update(env_overrides)
return env


Expand Down
23 changes: 13 additions & 10 deletions kart/workdir.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,8 +668,7 @@ def path(ds_or_path):

paths = [path(d) for d in datasets]

env = subprocess.tool_environment()
env["GIT_INDEX_FILE"] = str(self.index_path)
env_overrides = {"GIT_INDEX_FILE": str(self.index_path)}

try:
# Use Git to figure out which files in the in the index need to be updated.
Expand All @@ -684,7 +683,9 @@ def path(ds_or_path):
*paths,
]
output_lines = (
subprocess.check_output(cmd, env=env, encoding="utf-8", cwd=self.path)
subprocess.check_output(
cmd, env_overrides=env_overrides, encoding="utf-8", cwd=self.path
)
.strip()
.splitlines()
)
Expand Down Expand Up @@ -727,15 +728,14 @@ def _reset_workdir_index_for_files(self, file_paths):
# the file's `stat` information - this allows for an optimisation where diffs can be generated
# without hashing the working copy files. pygit2.Index doesn't give easy access to this info.

env = subprocess.tool_environment()
env["GIT_INDEX_FILE"] = str(self.index_path)
env_overrides = {"GIT_INDEX_FILE": str(self.index_path)}

try:
cmd = ["git", "update-index", "--add", "--remove", "-z", "--stdin"]
subprocess.run(
cmd,
check=True,
env=env,
env_overrides=env_overrides,
cwd=self.path,
input="\0".join(file_paths),
encoding="utf-8",
Expand Down Expand Up @@ -852,22 +852,25 @@ def write_mosaic_for_dataset(self, dataset):
dataset.write_mosaic_for_directory((self.path / dataset.path).resolve())

def dirty_paths(self):
env = subprocess.tool_environment()
env["GIT_INDEX_FILE"] = str(self.index_path)
env_overrides = {"GIT_INDEX_FILE": str(self.index_path)}

try:
# This finds all files in the index that have been modified - and updates any mtimes in the index
# if the mtimes are stale but the files are actually unchanged (as in GIT_DIFF_UPDATE_INDEX).
cmd = ["git", "diff", "--name-only"]
output_lines = (
subprocess.check_output(cmd, env=env, encoding="utf-8", cwd=self.path)
subprocess.check_output(
cmd, env_overrides=env_overrides, encoding="utf-8", cwd=self.path
)
.strip()
.splitlines()
)
# This finds all untracked files that are not in the index.
cmd = ["git", "ls-files", "--others", "--exclude-standard"]
output_lines += (
subprocess.check_output(cmd, env=env, encoding="utf-8", cwd=self.path)
subprocess.check_output(
cmd, env_overrides=env_overrides, encoding="utf-8", cwd=self.path
)
.strip()
.splitlines()
)
Expand Down
8 changes: 5 additions & 3 deletions tests/point_cloud/test_workingcopy.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,16 +979,18 @@ def test_working_copy_mtime_updated(cli_runner, data_archive, requires_pdal):
assert r.stdout.splitlines()[-1] == "Nothing to commit, working copy clean"

repo = KartRepo(repo_path)
env = subprocess.tool_environment()
env["GIT_INDEX_FILE"] = str(repo.working_copy.workdir.index_path)
env_overrides = {"GIT_INDEX_FILE": str(repo.working_copy.workdir.index_path)}

def get_touched_files():
# git diff-files never compares OIDs - it just lists files which appear
# to be dirty based on a different mtime to the mtime in the index.
cmd = ["git", "diff-files"]
return (
subprocess.check_output(
cmd, env=env, encoding="utf-8", cwd=repo.workdir_path
cmd,
env_overrides=env_overrides,
encoding="utf-8",
cwd=repo.workdir_path,
)
.strip()
.splitlines()
Expand Down
22 changes: 13 additions & 9 deletions tests/test_subprocess_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,22 @@ def test_subprocess_tool_environment():
assert sys.executable.startswith(env_exec["PATH"].split(os.pathsep)[0])

if platform.system() == "Linux":
env_in = {"LD_LIBRARY_PATH": "bob", "LD_LIBRARY_PATH_ORIG": "alex", "my": "me"}
env_exec = subprocess_util.tool_environment(env_in)
assert env_exec is not env_in
base_env = {
"LD_LIBRARY_PATH": "bob",
"LD_LIBRARY_PATH_ORIG": "alex",
"my": "me",
}
env_exec = subprocess_util.tool_environment(base_env=base_env)
assert env_exec is not base_env
assert env_exec["LD_LIBRARY_PATH"] == "alex"
assert env_exec["my"] == "me"

env_in = {"LD_LIBRARY_PATH": "bob", "my": "me"}
env_exec = subprocess_util.tool_environment(env_in)
base_env = {"LD_LIBRARY_PATH": "bob", "my": "me"}
env_exec = subprocess_util.tool_environment(base_env=base_env)
assert "LD_LIBRARY_PATH" not in env_exec
else:
env_in = {"my": "me"}
env_exec = subprocess_util.tool_environment(env_in)
assert env_exec is not env_in
base_env = {"my": "me"}
env_exec = subprocess_util.tool_environment(base_env=base_env)
assert env_exec is not base_env
env_exec.pop("PATH", None)
assert env_exec == env_in
assert env_exec == base_env

0 comments on commit a775242

Please sign in to comment.