Skip to content
This repository has been archived by the owner on Oct 13, 2023. It is now read-only.

Commit

Permalink
Add timeouts to oc image mirror
Browse files Browse the repository at this point in the history
We observed an issue that `oc image mirror` had stuck for 2 hours.

Add a timeout of 30 minutes to each `oc image mirror` invocation.

See https://coreos.slack.com/archives/GDBRP5YJH/p1659501833732749.
  • Loading branch information
vfreex authored and sosiouxme committed Aug 4, 2022
1 parent 5cf3cc0 commit 5a40d8c
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 9 deletions.
2 changes: 1 addition & 1 deletion doozerlib/cli/images_streams.py
Expand Up @@ -117,7 +117,7 @@ def images_streams_mirror(runtime, streams, only_if_missing, live_test_mode, dry
if dry_run:
print(f'For {upstream_entry_name}, would have run: {arm_cmd}')
else:
exectools.cmd_assert(arm_cmd, retries=3, realtime=True)
exectools.cmd_assert(arm_cmd, retries=3, realtime=True, timeout=1800)


@images_streams.command('check-upstream', short_help='Dumps information about CI buildconfigs/mirrored images associated with this group.')
Expand Down
2 changes: 1 addition & 1 deletion doozerlib/cli/release_gen_payload.py
Expand Up @@ -425,7 +425,7 @@ def add_image_to_mirror(src_pullspec, dest_pullspec):

if apply or apply_multi_arch:
logger.info(f'Mirroring images from {str(src_dest_path)}')
exectools.cmd_assert(f'oc image mirror --keep-manifest-list --filename={str(src_dest_path)}', retries=3)
exectools.cmd_assert(f'oc image mirror --keep-manifest-list --filename={str(src_dest_path)}', retries=3, timeout=1800)

for private_mode in privacy_modes:
logger.info(f'Building payload files for architecture: {arch}; private: {private_mode}')
Expand Down
2 changes: 1 addition & 1 deletion doozerlib/distgit.py
Expand Up @@ -852,7 +852,7 @@ def push_image(self, tag_list, push_to_defaults, additional_registries=[], versi
else:
for r in range(10):
self.logger.info("Mirroring image [retry={}]".format(r))
rc, out, err = exectools.cmd_gather(mirror_cmd)
rc, out, err = exectools.cmd_gather(mirror_cmd, timeout=1800)
if rc == 0:
break
self.logger.info("Error mirroring image -- retrying in 60 seconds.\n{}".format(err))
Expand Down
14 changes: 10 additions & 4 deletions doozerlib/exectools.py
Expand Up @@ -74,7 +74,7 @@ def urlopen_assert(url_or_req, httpcode=200, retries=3):


def cmd_assert(cmd, realtime=False, retries=1, pollrate=60, on_retry=None,
set_env=None, strip=False, log_stdout: bool = False, log_stderr: bool = True) -> Tuple[str, str]:
set_env=None, strip=False, log_stdout: bool = False, log_stderr: bool = True, timeout: Optional[int] = None) -> Tuple[str, str]:
"""
Run a command, logging (using exec_cmd) and raise an exception if the
return code of the command indicates failure.
Expand All @@ -89,6 +89,7 @@ def cmd_assert(cmd, realtime=False, retries=1, pollrate=60, on_retry=None,
:param strip: Strip extra whitespace from stdout/err before returning.
:param log_stdout: Whether stdout should be logged into the DEBUG log.
:param log_stderr: Whether stderr should be logged into the DEBUG log
:param timeout: Kill the process if it does not terminate after timeout seconds.
:return: (stdout,stderr) if exit code is zero
"""

Expand All @@ -103,7 +104,7 @@ def cmd_assert(cmd, realtime=False, retries=1, pollrate=60, on_retry=None,
cmd_gather(on_retry, set_env)

result, stdout, stderr = cmd_gather(cmd, set_env=set_env, realtime=realtime, strip=strip,
log_stdout=log_stdout, log_stderr=log_stderr)
log_stdout=log_stdout, log_stderr=log_stderr, timeout=timeout)
if result == SUCCESS:
break

Expand All @@ -121,7 +122,7 @@ def cmd_assert(cmd, realtime=False, retries=1, pollrate=60, on_retry=None,
cmd_counter = 0 # Increments atomically to help search logs for command start/stop


def cmd_gather(cmd: Union[str, List], set_env: Optional[Dict[str, str]] = None, realtime=False, strip=False, log_stdout=False, log_stderr=True) -> Tuple[int, str, str]:
def cmd_gather(cmd: Union[str, List], set_env: Optional[Dict[str, str]] = None, realtime=False, strip=False, log_stdout=False, log_stderr=True, timeout: Optional[int] = None) -> Tuple[int, str, str]:
"""
Runs a command and returns rc,stdout,stderr as a tuple.
Expand All @@ -135,6 +136,7 @@ def cmd_gather(cmd: Union[str, List], set_env: Optional[Dict[str, str]] = None,
:param strip: Strip extra whitespace from stdout/err before returning.
:param log_stdout: Whether stdout should be logged into the DEBUG log.
:param log_stderr: Whether stderr should be logged into the DEBUG log
:param timeout: Kill the process if it does not terminate after timeout seconds.
:return: (rc,stdout,stderr)
"""
global cmd_counter, cmd_counter_lock
Expand Down Expand Up @@ -173,7 +175,11 @@ def cmd_gather(cmd: Union[str, List], set_env: Optional[Dict[str, str]] = None,
return exc.errno, "", description

if not realtime:
out, err = proc.communicate()
try:
out, err = proc.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
proc.kill()
out, err = proc.communicate()
rc = proc.returncode
else:
out = b''
Expand Down
4 changes: 2 additions & 2 deletions tests/test_distgit/test_image_distgit/test_push_image.py
Expand Up @@ -501,7 +501,7 @@ def test_push_image_insecure_source(self):

(flexmock(distgit.exectools)
.should_receive("cmd_gather")
.with_args(expected_cmd)
.with_args(expected_cmd, timeout=1800)
.once()
.and_return((0, "", "")))

Expand Down Expand Up @@ -547,7 +547,7 @@ def test_push_image_registry_config(self):

(flexmock(distgit.exectools)
.should_receive("cmd_gather")
.with_args(expected_cmd)
.with_args(expected_cmd, timeout=1800)
.once()
.and_return((0, "", "")))

Expand Down

0 comments on commit 5a40d8c

Please sign in to comment.