From a76b7e56e26a57adc10ab75f95e66e48848e32d0 Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Thu, 4 Sep 2025 19:21:48 +0100 Subject: [PATCH 1/2] [update-checkout] reduce the script's default verbosity --- .../update_checkout/__init__.py | 1 - .../update_checkout/parallel_runner.py | 129 ++++++++++++++++++ .../update_checkout/update_checkout.py | 121 ++++++---------- 3 files changed, 169 insertions(+), 82 deletions(-) create mode 100644 utils/update_checkout/update_checkout/parallel_runner.py diff --git a/utils/update_checkout/update_checkout/__init__.py b/utils/update_checkout/update_checkout/__init__.py index 99cc1c7c3d1d1..7a4ee69672705 100644 --- a/utils/update_checkout/update_checkout/__init__.py +++ b/utils/update_checkout/update_checkout/__init__.py @@ -1,4 +1,3 @@ - from .update_checkout import main __all__ = ["main"] diff --git a/utils/update_checkout/update_checkout/parallel_runner.py b/utils/update_checkout/update_checkout/parallel_runner.py new file mode 100644 index 0000000000000..4b0810a205da2 --- /dev/null +++ b/utils/update_checkout/update_checkout/parallel_runner.py @@ -0,0 +1,129 @@ +from multiprocessing.managers import ListProxy, ValueProxy +import sys +from multiprocessing import Pool, cpu_count, Manager +import time +from typing import Callable, List, Any +from threading import Thread, Event, Lock +import shutil + +class MonitoredFunction: + def __init__(self, fn: Callable, running_tasks: ListProxy, updated_repos: ValueProxy, lock: Lock): + self.fn = fn + self.running_tasks = running_tasks + self.updated_repos = updated_repos + self._lock = lock + + def __call__(self, *args): + task_name = args[0][2] + self.running_tasks.append(task_name) + try: + return self.fn(*args) + finally: + self._lock.acquire() + self.running_tasks.remove(task_name) + self.updated_repos.set(self.updated_repos.get() + 1) + self._lock.release() + + +class ParallelRunner: + def __init__(self, fn: Callable, pool_args: List[List[Any]], n_processes: int = 0): + self._monitor_polling_period = 0.1 + if n_processes == 0: + n_processes = cpu_count() * 2 + self._terminal_width = shutil.get_terminal_size().columns + self._n_processes = n_processes + self._pool_args = pool_args + self._fn = fn + self._lock = Manager().Lock() + self._pool = Pool( + processes=self._n_processes, initializer=self._child_init, initargs=(self._lock,) + ) + self._verbose = pool_args[0][len(pool_args[0]) - 1] + self._nb_repos = len(pool_args) + self._stop_event = Event() + self._running_tasks = Manager().list() + self._updated_repos = Manager().Value('i', 0) + self._monitored_fn = MonitoredFunction(self._fn, self._running_tasks, self._updated_repos, self._lock) + + def run(self) -> List[Any]: + print( + "Running ``%s`` with up to %d processes." + % (self._fn.__name__, self._n_processes) + ) + + if self._verbose: + results = self._pool.map_async( + func=self._fn, iterable=self._pool_args + ).get() + self._pool.close() + self._pool.join() + else: + monitor_thread = Thread(target=self._monitor, daemon=True) + monitor_thread.start() + results = self._pool.map_async( + func=self._monitored_fn, iterable=self._pool_args + ).get() + self._pool.close() + self._pool.join() + self._stop_event.set() + monitor_thread.join() + return results + + def _monitor(self): + last_output = "" + while not self._stop_event.is_set(): + current = list(self._running_tasks) + current_line = ", ".join(current) + + if current_line != last_output: + truncated = f"Updating [{self._updated_repos.get()}/{self._nb_repos}] ({current_line})" + if len(truncated) > self._terminal_width: + ellipsis_marker = " ..." + truncated = ( + truncated[: self._terminal_width - len(ellipsis_marker)] + + ellipsis_marker + ) + sys.stdout.write("\r" + truncated.ljust(self._terminal_width)) + sys.stdout.flush() + last_output = current_line + + time.sleep(self._monitor_polling_period) + + sys.stdout.write("\r" + " " * len(last_output) + "\r") + sys.stdout.flush() + + @staticmethod + def _clear_lines(n): + for _ in range(n): + sys.stdout.write("\x1b[1A") + sys.stdout.write("\x1b[2K") + + @staticmethod + def check_results(results, op): + """Function used to check the results of ParallelRunner. + + NOTE: This function was originally located in the shell module of + swift_build_support and should eventually be replaced with a better + parallel implementation. + """ + + fail_count = 0 + if results is None: + return 0 + for r in results: + if r is not None: + if fail_count == 0: + print("======%s FAILURES======" % op) + fail_count += 1 + if isinstance(r, str): + print(r) + continue + print("%s failed (ret=%d): %s" % (r.repo_path, r.ret, r)) + if r.stderr: + print(r.stderr.decode()) + return fail_count + + @staticmethod + def _child_init(lck): + global lock + lock = lck diff --git a/utils/update_checkout/update_checkout/update_checkout.py b/utils/update_checkout/update_checkout/update_checkout.py index c83e1cb24ea0d..25fd5cdbb812c 100755 --- a/utils/update_checkout/update_checkout/update_checkout.py +++ b/utils/update_checkout/update_checkout/update_checkout.py @@ -15,11 +15,12 @@ import re import sys import traceback -from multiprocessing import Lock, Pool, cpu_count, freeze_support +from multiprocessing import freeze_support from typing import Optional, Set, List, Any from build_swift.build_swift.constants import SWIFT_SOURCE_ROOT +from .parallel_runner import ParallelRunner from swift_build_support.swift_build_support import shell @@ -27,57 +28,6 @@ SCRIPT_DIR = os.path.dirname(SCRIPT_FILE) -def child_init(lck): - global lock - lock = lck - - -def run_parallel(fn, pool_args, n_processes=0): - """Function used to run a given closure in parallel. - - NOTE: This function was originally located in the shell module of - swift_build_support and should eventually be replaced with a better - parallel implementation. - """ - - if n_processes == 0: - n_processes = cpu_count() * 2 - - lk = Lock() - print("Running ``%s`` with up to %d processes." % - (fn.__name__, n_processes)) - pool = Pool(processes=n_processes, initializer=child_init, initargs=(lk,)) - results = pool.map_async(func=fn, iterable=pool_args).get(999999) - pool.close() - pool.join() - return results - - -def check_parallel_results(results, op): - """Function used to check the results of run_parallel. - - NOTE: This function was originally located in the shell module of - swift_build_support and should eventually be replaced with a better - parallel implementation. - """ - - fail_count = 0 - if results is None: - return 0 - for r in results: - if r is not None: - if fail_count == 0: - print("======%s FAILURES======" % op) - fail_count += 1 - if isinstance(r, str): - print(r) - continue - print("%s failed (ret=%d): %s" % (r.repo_path, r.ret, r)) - if r.stderr: - print(r.stderr) - return fail_count - - def confirm_tag_in_repo(tag, repo_name) -> Optional[str]: # type: (str, str) -> str | None """Confirm that a given tag exists in a git repository. This function @@ -159,16 +109,19 @@ def get_branch_for_repo(config, repo_name, scheme_name, scheme_map, def update_single_repository(pool_args): source_root, config, repo_name, scheme_name, scheme_map, tag, timestamp, \ - reset_to_remote, should_clean, should_stash, cross_repos_pr = pool_args + reset_to_remote, should_clean, should_stash, cross_repos_pr, verbose = \ + pool_args + repo_path = os.path.join(source_root, repo_name) if not os.path.isdir(repo_path) or os.path.islink(repo_path): return try: prefix = "[{0}] ".format(os.path.basename(repo_path)).ljust(40) - print(prefix + "Updating '" + repo_path + "'") + if verbose: + print(prefix + "Updating '" + repo_path + "'") - with shell.pushd(repo_path, dry_run=False, echo=False): + with shell.pushd(repo_path, dry_run=False, echo=verbose): cross_repo = False checkout_target = None if tag: @@ -189,10 +142,10 @@ def update_single_repository(pool_args): if should_clean or should_stash: def run_for_repo_and_each_submodule_rec(cmd): - shell.run(cmd, echo=True, prefix=prefix) + shell.run(cmd, echo=verbose, prefix=prefix) shell.run( ["git", "submodule", "foreach", "--recursive"] + cmd, - echo=True, + echo=verbose, prefix=prefix, ) @@ -212,7 +165,7 @@ def run_for_repo_and_each_submodule_rec(cmd): # It is possible to reset --hard and still be mid-rebase. try: shell.run(['git', 'rebase', '--abort'], - echo=True, prefix=prefix) + echo=verbose, prefix=prefix) except Exception: pass @@ -229,32 +182,32 @@ def run_for_repo_and_each_submodule_rec(cmd): except Exception: shell.run(["git", "fetch", "--recurse-submodules=yes", "--tags"], - echo=True, prefix=prefix) + echo=verbose, prefix=prefix) try: shell.run(['git', 'checkout', checkout_target], - echo=True, prefix=prefix) - except Exception as originalException: + echo=verbose, prefix=prefix) + except Exception: try: result = shell.run(['git', 'rev-parse', checkout_target]) revision = result[0].strip() shell.run(['git', 'checkout', revision], - echo=True, prefix=prefix) + echo=verbose, prefix=prefix) except Exception: - raise originalException + raise # It's important that we checkout, fetch, and rebase, in order. # .git/FETCH_HEAD updates the not-for-merge attributes based on # which branch was checked out during the fetch. shell.run(["git", "fetch", "--recurse-submodules=yes", "--tags"], - echo=True, prefix=prefix) + echo=verbose, prefix=prefix) # If we were asked to reset to the specified branch, do the hard # reset and return. if checkout_target and reset_to_remote and not cross_repo: full_target = full_target_name('origin', checkout_target) shell.run(['git', 'reset', '--hard', full_target], - echo=True, prefix=prefix) + echo=verbose, prefix=prefix) return # Query whether we have a "detached HEAD", which will mean that @@ -282,17 +235,18 @@ def run_for_repo_and_each_submodule_rec(cmd): # http://stackoverflow.com/a/30209750/125349 if not cross_repo and not detached_head: shell.run(["git", "rebase", "FETCH_HEAD"], - echo=True, prefix=prefix) - elif detached_head: + echo=verbose, prefix=prefix) + elif detached_head and verbose: print(prefix + "Detached HEAD; probably checked out a tag. No need " "to rebase.") shell.run(["git", "submodule", "update", "--recursive"], - echo=True, prefix=prefix) + echo=verbose, prefix=prefix) except Exception: (type, value, tb) = sys.exc_info() - print('Error on repo "%s": %s' % (repo_path, traceback.format_exc())) + if verbose: + print('Error on repo "%s": %s' % (repo_path, traceback.format_exc())) return value @@ -395,7 +349,8 @@ def update_all_repositories(args, config, scheme_name, scheme_map, cross_repos_p args.reset_to_remote, args.clean, args.stash, - cross_repos_pr] + cross_repos_pr, + args.verbose] pool_args.append(my_args) locked_repositories: set[str] = _is_any_repository_locked(pool_args) @@ -404,12 +359,12 @@ def update_all_repositories(args, config, scheme_name, scheme_map, cross_repos_p f"'{repo_name}' is locked by git. Cannot update it." for repo_name in locked_repositories ] - return run_parallel(update_single_repository, pool_args, args.n_processes) + return ParallelRunner(update_single_repository, pool_args, args.n_processes).run() def obtain_additional_swift_sources(pool_args): (args, repo_name, repo_info, repo_branch, remote, with_ssh, scheme_name, - skip_history, skip_tags, skip_repository_list, use_submodules) = pool_args + skip_history, skip_tags, skip_repository_list, use_submodules, verbose) = pool_args env = dict(os.environ) env.update({'GIT_TERMINAL_PROMPT': '0'}) @@ -423,17 +378,17 @@ def obtain_additional_swift_sources(pool_args): '--branch', repo_branch, remote, repo_name] + (['--no-tags'] if skip_tags else []), env=env, - echo=True) + echo=verbose) elif use_submodules: shell.run(['git', 'submodule', 'add', remote, repo_name] + (['--no-tags'] if skip_tags else []), env=env, - echo=True) + echo=verbose) else: shell.run(['git', 'clone', '--recursive', remote, repo_name] + (['--no-tags'] if skip_tags else []), env=env, - echo=True) + echo=verbose) if scheme_name: src_path = os.path.join(args.source_root, repo_name, ".git") shell.run(['git', '--git-dir', @@ -512,22 +467,22 @@ def obtain_all_additional_swift_sources(args, config, with_ssh, scheme_name, new_args = [args, repo_name, repo_info, repo_branch, remote, with_ssh, scheme_name, skip_history, skip_tags, - skip_repository_list, use_submodules] + skip_repository_list, use_submodules, args.verbose] if use_submodules: obtain_additional_swift_sources(new_args) else: pool_args.append(new_args) - # Only use `run_parallel` when submodules are not used, since `.git` dir + # Only use `ParallelRunner` when submodules are not used, since `.git` dir # can't be accessed concurrently. if not use_submodules: if not pool_args: print("Not cloning any repositories.") return - return run_parallel( - obtain_additional_swift_sources, pool_args, args.n_processes) + return ParallelRunner( + obtain_additional_swift_sources, pool_args, args.n_processes).run() def dump_repo_hashes(args, config, branch_scheme_name='repro'): @@ -781,6 +736,10 @@ def main(): "--use-submodules", help="Checkout repositories as git submodules.", action='store_true') + parser.add_argument( + "-v", "--verbose", + help="Increases the script's verbosity.", + action='store_true') args = parser.parse_args() if not args.scheme: @@ -886,8 +845,8 @@ def main(): update_results = update_all_repositories(args, config, scheme_name, scheme_map, cross_repos_pr) fail_count = 0 - fail_count += check_parallel_results(clone_results, "CLONE") - fail_count += check_parallel_results(update_results, "UPDATE") + fail_count += ParallelRunner.check_results(clone_results, "CLONE") + fail_count += ParallelRunner.check_results(update_results, "UPDATE") if fail_count > 0: print("update-checkout failed, fix errors and try again") else: From 7909a943aeebe2403f183c4e0fc6b76e8a593591 Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Wed, 1 Oct 2025 18:29:36 +0100 Subject: [PATCH 2/2] [update-checkout] refactor arguments passing --- utils/update_checkout/tests/test_clone.py | 3 +- .../tests/test_locked_repository.py | 38 ++++-- .../update_checkout/parallel_runner.py | 77 +++++++----- .../update_checkout/runner_arguments.py | 33 +++++ .../update_checkout/update_checkout.py | 113 ++++++++++-------- 5 files changed, 174 insertions(+), 90 deletions(-) create mode 100644 utils/update_checkout/update_checkout/runner_arguments.py diff --git a/utils/update_checkout/tests/test_clone.py b/utils/update_checkout/tests/test_clone.py index 0b5c8bbb32f4d..b6785a4f7f503 100644 --- a/utils/update_checkout/tests/test_clone.py +++ b/utils/update_checkout/tests/test_clone.py @@ -36,7 +36,8 @@ def test_clone_with_additional_scheme(self): '--config', self.additional_config_path, '--source-root', self.source_root, '--clone', - '--scheme', 'extra']) + '--scheme', 'extra', + '--verbose']) # Test that we're actually checking out the 'extra' scheme based on the output self.assertIn(b"git checkout refs/heads/main", output) diff --git a/utils/update_checkout/tests/test_locked_repository.py b/utils/update_checkout/tests/test_locked_repository.py index 28cd71b8d930f..e81166b533c79 100644 --- a/utils/update_checkout/tests/test_locked_repository.py +++ b/utils/update_checkout/tests/test_locked_repository.py @@ -1,7 +1,26 @@ import unittest from unittest.mock import patch -from update_checkout.update_checkout import _is_any_repository_locked +from update_checkout.update_checkout import UpdateArguments, _is_any_repository_locked + + +def _update_arguments_with_fake_path(repo_name: str, path: str) -> UpdateArguments: + return UpdateArguments( + repo_name=repo_name, + source_root=path, + config={}, + scheme_name="", + scheme_map=None, + tag="", + timestamp=None, + reset_to_remote=False, + clean=False, + stash=False, + cross_repos_pr=False, + output_prefix="", + verbose=False, + ) + class TestIsAnyRepositoryLocked(unittest.TestCase): @patch("os.path.exists") @@ -9,8 +28,8 @@ class TestIsAnyRepositoryLocked(unittest.TestCase): @patch("os.listdir") def test_repository_with_lock_file(self, mock_listdir, mock_isdir, mock_exists): pool_args = [ - ("/fake_path", None, "repo1"), - ("/fake_path", None, "repo2"), + _update_arguments_with_fake_path("repo1", "/fake_path"), + _update_arguments_with_fake_path("repo2", "/fake_path"), ] def listdir_side_effect(path): @@ -32,7 +51,7 @@ def listdir_side_effect(path): @patch("os.listdir") def test_repository_without_git_dir(self, mock_listdir, mock_isdir, mock_exists): pool_args = [ - ("/fake_path", None, "repo1"), + _update_arguments_with_fake_path("repo1", "/fake_path"), ] mock_exists.return_value = False @@ -47,7 +66,7 @@ def test_repository_without_git_dir(self, mock_listdir, mock_isdir, mock_exists) @patch("os.listdir") def test_repository_with_git_file(self, mock_listdir, mock_isdir, mock_exists): pool_args = [ - ("/fake_path", None, "repo1"), + _update_arguments_with_fake_path("repo1", "/fake_path"), ] mock_exists.return_value = True @@ -60,9 +79,11 @@ def test_repository_with_git_file(self, mock_listdir, mock_isdir, mock_exists): @patch("os.path.exists") @patch("os.path.isdir") @patch("os.listdir") - def test_repository_with_multiple_lock_files(self, mock_listdir, mock_isdir, mock_exists): + def test_repository_with_multiple_lock_files( + self, mock_listdir, mock_isdir, mock_exists + ): pool_args = [ - ("/fake_path", None, "repo1"), + _update_arguments_with_fake_path("repo1", "/fake_path"), ] mock_exists.return_value = True @@ -77,7 +98,7 @@ def test_repository_with_multiple_lock_files(self, mock_listdir, mock_isdir, moc @patch("os.listdir") def test_repository_with_no_lock_files(self, mock_listdir, mock_isdir, mock_exists): pool_args = [ - ("/fake_path", None, "repo1"), + _update_arguments_with_fake_path("repo1", "/fake_path"), ] mock_exists.return_value = True @@ -86,4 +107,3 @@ def test_repository_with_no_lock_files(self, mock_listdir, mock_isdir, mock_exis result = _is_any_repository_locked(pool_args) self.assertEqual(result, set()) - diff --git a/utils/update_checkout/update_checkout/parallel_runner.py b/utils/update_checkout/update_checkout/parallel_runner.py index 4b0810a205da2..fbbe7bd060e7b 100644 --- a/utils/update_checkout/update_checkout/parallel_runner.py +++ b/utils/update_checkout/update_checkout/parallel_runner.py @@ -2,59 +2,79 @@ import sys from multiprocessing import Pool, cpu_count, Manager import time -from typing import Callable, List, Any -from threading import Thread, Event, Lock +from typing import Callable, List, Any, Union +from threading import Lock, Thread, Event import shutil +from .runner_arguments import RunnerArguments, AdditionalSwiftSourcesArguments + + class MonitoredFunction: - def __init__(self, fn: Callable, running_tasks: ListProxy, updated_repos: ValueProxy, lock: Lock): + def __init__( + self, + fn: Callable, + running_tasks: ListProxy, + updated_repos: ValueProxy, + lock: Lock + ): self.fn = fn self.running_tasks = running_tasks self.updated_repos = updated_repos self._lock = lock - def __call__(self, *args): - task_name = args[0][2] + def __call__(self, *args: Union[RunnerArguments, AdditionalSwiftSourcesArguments]): + task_name = args[0].repo_name self.running_tasks.append(task_name) + result = None try: - return self.fn(*args) + result = self.fn(*args) + except Exception as e: + print(e) finally: self._lock.acquire() - self.running_tasks.remove(task_name) + if task_name in self.running_tasks: + self.running_tasks.remove(task_name) self.updated_repos.set(self.updated_repos.get() + 1) self._lock.release() + return result class ParallelRunner: - def __init__(self, fn: Callable, pool_args: List[List[Any]], n_processes: int = 0): + def __init__( + self, + fn: Callable, + pool_args: List[Union[RunnerArguments, AdditionalSwiftSourcesArguments]], + n_processes: int = 0, + ): self._monitor_polling_period = 0.1 if n_processes == 0: n_processes = cpu_count() * 2 self._terminal_width = shutil.get_terminal_size().columns self._n_processes = n_processes self._pool_args = pool_args + manager = Manager() + self._lock = manager.Lock() + self._running_tasks = manager.list() + self._updated_repos = manager.Value("i", 0) self._fn = fn - self._lock = Manager().Lock() - self._pool = Pool( - processes=self._n_processes, initializer=self._child_init, initargs=(self._lock,) - ) - self._verbose = pool_args[0][len(pool_args[0]) - 1] + self._pool = Pool(processes=self._n_processes) + self._verbose = pool_args[0].verbose + self._output_prefix = pool_args[0].output_prefix self._nb_repos = len(pool_args) self._stop_event = Event() - self._running_tasks = Manager().list() - self._updated_repos = Manager().Value('i', 0) - self._monitored_fn = MonitoredFunction(self._fn, self._running_tasks, self._updated_repos, self._lock) + self._monitored_fn = MonitoredFunction( + self._fn, self._running_tasks, self._updated_repos, self._lock + ) def run(self) -> List[Any]: print( "Running ``%s`` with up to %d processes." % (self._fn.__name__, self._n_processes) ) - if self._verbose: results = self._pool.map_async( func=self._fn, iterable=self._pool_args - ).get() + ).get(timeout=1800) self._pool.close() self._pool.join() else: @@ -62,7 +82,7 @@ def run(self) -> List[Any]: monitor_thread.start() results = self._pool.map_async( func=self._monitored_fn, iterable=self._pool_args - ).get() + ).get(timeout=1800) self._pool.close() self._pool.join() self._stop_event.set() @@ -72,11 +92,14 @@ def run(self) -> List[Any]: def _monitor(self): last_output = "" while not self._stop_event.is_set(): + self._lock.acquire() current = list(self._running_tasks) current_line = ", ".join(current) + updated_repos = self._updated_repos.get() + self._lock.release() if current_line != last_output: - truncated = f"Updating [{self._updated_repos.get()}/{self._nb_repos}] ({current_line})" + truncated = (f"{self._output_prefix} [{updated_repos}/{self._nb_repos}] ({current_line})") if len(truncated) > self._terminal_width: ellipsis_marker = " ..." truncated = ( @@ -89,17 +112,11 @@ def _monitor(self): time.sleep(self._monitor_polling_period) - sys.stdout.write("\r" + " " * len(last_output) + "\r") + sys.stdout.write("\r" + " " * len(last_output) + "\r\n") sys.stdout.flush() @staticmethod - def _clear_lines(n): - for _ in range(n): - sys.stdout.write("\x1b[1A") - sys.stdout.write("\x1b[2K") - - @staticmethod - def check_results(results, op): + def check_results(results, op) -> int: """Function used to check the results of ParallelRunner. NOTE: This function was originally located in the shell module of @@ -123,7 +140,3 @@ def check_results(results, op): print(r.stderr.decode()) return fail_count - @staticmethod - def _child_init(lck): - global lock - lock = lck diff --git a/utils/update_checkout/update_checkout/runner_arguments.py b/utils/update_checkout/update_checkout/runner_arguments.py new file mode 100644 index 0000000000000..6d542c838336c --- /dev/null +++ b/utils/update_checkout/update_checkout/runner_arguments.py @@ -0,0 +1,33 @@ +from dataclasses import dataclass +from typing import Any, Dict + +@dataclass +class RunnerArguments: + repo_name: str + scheme_name: str + output_prefix: str + verbose: bool + +@dataclass +class UpdateArguments(RunnerArguments): + source_root: str + config: Dict[str, Any] + scheme_map: Any + tag: str + timestamp: Any + reset_to_remote: bool + clean: bool + stash: bool + cross_repos_pr: bool + +@dataclass +class AdditionalSwiftSourcesArguments(RunnerArguments): + args: RunnerArguments + repo_info: str + repo_branch: str + remote: str + with_ssh: bool + skip_history: bool + skip_tags: bool + skip_repository_list: bool + use_submodules: bool diff --git a/utils/update_checkout/update_checkout/update_checkout.py b/utils/update_checkout/update_checkout/update_checkout.py index 25fd5cdbb812c..38fcbfc04c173 100755 --- a/utils/update_checkout/update_checkout/update_checkout.py +++ b/utils/update_checkout/update_checkout/update_checkout.py @@ -16,10 +16,10 @@ import sys import traceback from multiprocessing import freeze_support -from typing import Optional, Set, List, Any +from typing import Any, Dict, Optional, Set, List from build_swift.build_swift.constants import SWIFT_SOURCE_ROOT - +from .runner_arguments import AdditionalSwiftSourcesArguments, UpdateArguments from .parallel_runner import ParallelRunner from swift_build_support.swift_build_support import shell @@ -27,9 +27,7 @@ SCRIPT_FILE = os.path.abspath(__file__) SCRIPT_DIR = os.path.dirname(SCRIPT_FILE) - -def confirm_tag_in_repo(tag, repo_name) -> Optional[str]: - # type: (str, str) -> str | None +def confirm_tag_in_repo(tag: str, repo_name: str) -> Optional[str]: """Confirm that a given tag exists in a git repository. This function assumes that the repository is already a current working directory before it's called. @@ -52,7 +50,7 @@ def confirm_tag_in_repo(tag, repo_name) -> Optional[str]: return tag -def find_rev_by_timestamp(timestamp, repo_name, refspec): +def find_rev_by_timestamp(timestamp: str, repo_name: str, refspec: str) -> str: refspec_exists = True try: shell.run(["git", "rev-parse", "--verify", refspec]) @@ -107,12 +105,11 @@ def get_branch_for_repo(config, repo_name, scheme_name, scheme_map, return repo_branch, cross_repo -def update_single_repository(pool_args): - source_root, config, repo_name, scheme_name, scheme_map, tag, timestamp, \ - reset_to_remote, should_clean, should_stash, cross_repos_pr, verbose = \ - pool_args +def update_single_repository(pool_args: UpdateArguments): + verbose = pool_args.verbose + repo_name = pool_args.repo_name - repo_path = os.path.join(source_root, repo_name) + repo_path = os.path.join(pool_args.source_root, repo_name) if not os.path.isdir(repo_path) or os.path.islink(repo_path): return @@ -124,13 +121,13 @@ def update_single_repository(pool_args): with shell.pushd(repo_path, dry_run=False, echo=verbose): cross_repo = False checkout_target = None - if tag: - checkout_target = confirm_tag_in_repo(tag, repo_name) - elif scheme_name: + if pool_args.tag: + checkout_target = confirm_tag_in_repo(pool_args.tag, repo_name) + elif pool_args.scheme_name: checkout_target, cross_repo = get_branch_for_repo( - config, repo_name, scheme_name, scheme_map, cross_repos_pr) - if timestamp: - checkout_target = find_rev_by_timestamp(timestamp, + pool_args.config, repo_name, pool_args.scheme_name, pool_args.scheme_map, pool_args.cross_repos_pr) + if pool_args.timestamp: + checkout_target = find_rev_by_timestamp(pool_args.timestamp, repo_name, checkout_target) @@ -139,7 +136,7 @@ def update_single_repository(pool_args): # changes rather than discarding them) # 2. delete ignored files # 3. abort an ongoing rebase - if should_clean or should_stash: + if pool_args.clean or pool_args.stash: def run_for_repo_and_each_submodule_rec(cmd): shell.run(cmd, echo=verbose, prefix=prefix) @@ -149,10 +146,10 @@ def run_for_repo_and_each_submodule_rec(cmd): prefix=prefix, ) - if should_stash: + if pool_args.clean: # Stash tracked and untracked changes. run_for_repo_and_each_submodule_rec(["git", "stash", "-u"]) - elif should_clean: + elif pool_args.stash: # Delete tracked changes. run_for_repo_and_each_submodule_rec( ["git", "reset", "--hard", "HEAD"] @@ -204,7 +201,7 @@ def run_for_repo_and_each_submodule_rec(cmd): # If we were asked to reset to the specified branch, do the hard # reset and return. - if checkout_target and reset_to_remote and not cross_repo: + if checkout_target and pool_args.reset_to_remote and not cross_repo: full_target = full_target_name('origin', checkout_target) shell.run(['git', 'reset', '--hard', full_target], echo=verbose, prefix=prefix) @@ -294,7 +291,7 @@ def get_scheme_map(config, scheme_name): return None -def _is_any_repository_locked(pool_args: List[Any]) -> Set[str]: +def _is_any_repository_locked(pool_args: List[UpdateArguments]) -> Set[str]: """Returns the set of locked repositories. A repository is considered to be locked if its .git directory contains a @@ -308,7 +305,7 @@ def _is_any_repository_locked(pool_args: List[Any]) -> Set[str]: Set[str]: The names of the locked repositories if any. """ - repos = [(x[0], x[2]) for x in pool_args] + repos = [(x.source_root, x.repo_name) for x in pool_args] locked_repositories = set() for source_root, repo_name in repos: dot_git_path = os.path.join(source_root, repo_name, ".git") @@ -340,17 +337,21 @@ def update_all_repositories(args, config, scheme_name, scheme_map, cross_repos_p ) continue - my_args = [args.source_root, config, - repo_name, - scheme_name, - scheme_map, - args.tag, - timestamp, - args.reset_to_remote, - args.clean, - args.stash, - cross_repos_pr, - args.verbose] + my_args = UpdateArguments( + source_root=args.source_root, + config=config, + repo_name=repo_name, + scheme_name=scheme_name, + scheme_map=scheme_map, + tag=args.tag, + timestamp=timestamp, + reset_to_remote=args.reset_to_remote, + clean=args.clean, + stash=args.stash, + cross_repos_pr=cross_repos_pr, + output_prefix="Updating", + verbose=args.verbose, + ) pool_args.append(my_args) locked_repositories: set[str] = _is_any_repository_locked(pool_args) @@ -362,24 +363,29 @@ def update_all_repositories(args, config, scheme_name, scheme_map, cross_repos_p return ParallelRunner(update_single_repository, pool_args, args.n_processes).run() -def obtain_additional_swift_sources(pool_args): - (args, repo_name, repo_info, repo_branch, remote, with_ssh, scheme_name, - skip_history, skip_tags, skip_repository_list, use_submodules, verbose) = pool_args +def obtain_additional_swift_sources(pool_args: AdditionalSwiftSourcesArguments): + args = pool_args.args + repo_name = pool_args.repo_name + repo_branch = pool_args.repo_branch + verbose = pool_args.verbose + skip_tags = pool_args.skip_tags + remote = pool_args.remote env = dict(os.environ) env.update({'GIT_TERMINAL_PROMPT': '0'}) with shell.pushd(args.source_root, dry_run=False, echo=False): - print("Cloning '" + repo_name + "'") + if verbose: + print("Cloning '" + pool_args.repo_name + "'") - if skip_history: + if pool_args.skip_history: shell.run(['git', 'clone', '--recursive', '--depth', '1', '--branch', repo_branch, remote, repo_name] + (['--no-tags'] if skip_tags else []), env=env, echo=verbose) - elif use_submodules: + elif pool_args.use_submodules: shell.run(['git', 'submodule', 'add', remote, repo_name] + (['--no-tags'] if skip_tags else []), env=env, @@ -389,7 +395,7 @@ def obtain_additional_swift_sources(pool_args): (['--no-tags'] if skip_tags else []), env=env, echo=verbose) - if scheme_name: + if pool_args.scheme_name: src_path = os.path.join(args.source_root, repo_name, ".git") shell.run(['git', '--git-dir', src_path, '--work-tree', @@ -464,15 +470,26 @@ def obtain_all_additional_swift_sources(args, config, with_ssh, scheme_name, if repo_not_in_scheme: continue - - new_args = [args, repo_name, repo_info, repo_branch, remote, - with_ssh, scheme_name, skip_history, skip_tags, - skip_repository_list, use_submodules, args.verbose] + new_args = AdditionalSwiftSourcesArguments( + args=args, + repo_name=repo_name, + repo_info=repo_info, + repo_branch=repo_branch, + remote=remote, + with_ssh=with_ssh, + scheme_name=scheme_name, + skip_history=skip_history, + skip_tags=skip_tags, + skip_repository_list=skip_repository_list, + use_submodules=use_submodules, + output_prefix="Cloning", + verbose=args.verbose, + ) if use_submodules: - obtain_additional_swift_sources(new_args) + obtain_additional_swift_sources(new_args) else: - pool_args.append(new_args) + pool_args.append(new_args) # Only use `ParallelRunner` when submodules are not used, since `.git` dir # can't be accessed concurrently. @@ -558,7 +575,7 @@ def merge_config(config: dict, new_config: dict) -> dict: return result -def validate_config(config): +def validate_config(config: Dict[str, Any]): # Make sure that our branch-names are unique. scheme_names = config['branch-schemes'].keys() if len(scheme_names) != len(set(scheme_names)):