Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
worker: Allow cancellation of jobs while waiting for asset cache
* Keep processing events while waiting for the asset cache to allow
  receiving a job cancellation request
* See https://progress.opensuse.org/issues/91764
  • Loading branch information
Martchus committed May 11, 2021
1 parent cacaccb commit 734a8f6
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 11 deletions.
36 changes: 29 additions & 7 deletions lib/OpenQA/Worker.pm
Expand Up @@ -307,8 +307,7 @@ sub init {
# kill if stopping gracefully does not work
log_error('Another error occurred when trying to stop gracefully due to an error. '
. 'Trying to kill ourself forcefully now.');
$self->kill();
Mojo::IOLoop->stop();
$self->kill;
});


Expand Down Expand Up @@ -381,13 +380,37 @@ sub configure_cache_client {
$client->ua->inactivity_timeout($ENV{OPENQA_WORKER_CACHE_SERVICE_CHECK_INACTIVITY_TIMEOUT} // 10);
}

# "sleeps" for the specified number of seconds while actually running the worker's event loop started via exec() to
# keep processing events (like job cancellation)
sub delay ($self, $delay) {

# ensure the loop is stopped (as this function is supposed to be called from within the loop and tell
# the exec() function to resume running the loop
my $loop = Mojo::IOLoop->singleton;
$loop->stop if $loop->is_running;
$self->{_resume_loop} = 1;

my $timer = $loop->timer($delay, sub { $loop->stop });
$loop->start;
$loop->remove($timer);
}

sub stop_event_loop ($self) {
Mojo::IOLoop->stop;
$self->{_resume_loop} = undef;
}

sub exec {
my ($self) = @_;

my $return_code = $self->init;

# start event loop - this will block until stop is called
Mojo::IOLoop->start;
my $loop = Mojo::IOLoop->singleton;
do {
$self->{_resume_loop} = undef;
$loop->start;
} while ($self->{_resume_loop});

return $return_code;
}
Expand Down Expand Up @@ -584,7 +607,7 @@ sub stop {

# stop immediately if there is currently no job
my $current_job = $self->current_job;
return $self->_inform_webuis_before_stopping(sub { Mojo::IOLoop->stop; }) unless defined $current_job;
return $self->_inform_webuis_before_stopping(sub { $self->stop_event_loop }) unless defined $current_job;
return undef if $self->{_finishing_off};

# stop job directly during setup because the IO loop is blocked by isotovideo.pm during setup
Expand All @@ -604,7 +627,7 @@ sub kill {
my ($self) = @_;

if (my $current_job = $self->current_job) { $current_job->kill; }
Mojo::IOLoop->stop;
$self->stop_event_loop;
}

sub is_stopping {
Expand Down Expand Up @@ -690,8 +713,7 @@ sub _handle_client_status_changed {
}
if (!defined $self->current_job) {
log_error('Stopping because registration with all configured web UI hosts failed');
Mojo::IOLoop->stop;
return undef;
return $self->stop_event_loop;
}

# continue executing the current job even though the registration is not possible anymore; it
Expand Down
7 changes: 4 additions & 3 deletions lib/OpenQA/Worker/Engines/isotovideo.pm
Expand Up @@ -106,11 +106,12 @@ sub detect_asset_keys {
return \%res;
}

sub _poll_cache_service ($job, $cache_client, $request, $status_ref) {
sub _poll_cache_service ($job, $cache_client, $request, $status_ref, $delay = 5) {
until ($$status_ref->is_processed) {
sleep 5;
$job->worker->delay($delay);
return {error => 'Status updates interrupted'} unless $job->post_setup_status;
return {error => $$status_ref->error} if $$status_ref->has_error;
return {error => 'Job has been cancelled'} if $job->is_stopped_or_stopping;
return {error => $$status_ref->error} if $$status_ref->has_error;
$$status_ref = $cache_client->status($request);
}
return undef;
Expand Down
3 changes: 2 additions & 1 deletion lib/OpenQA/Worker/Job.pm
Expand Up @@ -258,7 +258,8 @@ sub start {
# let the IO loop take over if the job has been stopped during setup
# notes: - Stop has already been called at this point and async code for stopping is setup to run
# on the event loop.
# - This can happen if stop is called from an interrupt.
# - This can happen if stop is called from an interrupt or the job has been cancelled by the
# web UI.
return undef if $self->is_stopped_or_stopping;

log_error("Unable to setup job $id: $setup_error");
Expand Down

0 comments on commit 734a8f6

Please sign in to comment.