Skip to content

Commit

Permalink
Merge pull request #4276 from Martchus/scheduler-test
Browse files Browse the repository at this point in the history
Fix race-condition in `t/05-scheduler-full.t` which can lead to failures
  • Loading branch information
mergify[bot] committed Oct 6, 2021
2 parents b46be48 + ef62392 commit f9fc4d9
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 23 deletions.
32 changes: 15 additions & 17 deletions t/05-scheduler-full.t
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use Time::HiRes 'sleep';
use FindBin;
use lib "$FindBin::Bin/lib", "$FindBin::Bin/../external/os-autoinst-common/lib";
use OpenQA::Constants qw(DEFAULT_WORKER_TIMEOUT DB_TIMESTAMP_ACCURACY);
use OpenQA::Jobs::Constants;
use OpenQA::Scheduler::Client;
use OpenQA::Scheduler::Model::Jobs;
use OpenQA::Worker::WebUIConnection;
Expand All @@ -45,7 +46,7 @@ use OpenQA::Test::Utils qw(
create_webapi setup_share_dir create_websocket_server
stop_service unstable_worker
unresponsive_worker broken_worker rejective_worker
wait_for_or_bail_out
wait_for
);
use OpenQA::Test::TimeLimit '150';

Expand Down Expand Up @@ -176,11 +177,11 @@ subtest 're-scheduling and incompletion of jobs when worker rejects jobs or goes
my $job_scheduled = 0;
for (0 .. 100) {
my $job_state = $jobs->find(99982)->state;
if ($job_state eq OpenQA::Jobs::Constants::ASSIGNED) {
if ($job_state eq ASSIGNED) {
note 'job is assigned' unless $job_assigned; # uncoverable statement
$job_assigned = 1; # uncoverable statement
}
elsif ($job_state eq OpenQA::Jobs::Constants::SCHEDULED) {
elsif ($job_state eq SCHEDULED) {
$job_scheduled = 1;
last;
}
Expand All @@ -204,20 +205,17 @@ subtest 're-scheduling and incompletion of jobs when worker rejects jobs or goes
and is @{$allocated}[0]->{worker}, 5, 'job allocated to expected worker';

# kill the worker but assume the job has been actually started and is running
# note: Also setting back assigned_worker_id and result because the worker might have actually picked up the job.
stop_workers;
$jobs->find(99982)->update({state => OpenQA::Jobs::Constants::RUNNING});
$jobs->find(99982)->update({assigned_worker_id => 5, state => RUNNING, result => NONE});

@workers = unstable_worker(@$worker_settings, 3, -1);
wait_for_worker($schema, 5);

note 'waiting for job to be incompleted';
wait_for_or_bail_out { $jobs->find(99982)->state eq OpenQA::Jobs::Constants::DONE } 'Job 99982 is done';
wait_for { $jobs->find(99982)->state eq DONE } 'job 99982 is incompleted', {timeout => 20};

my $job = $jobs->find(99982);
is $job->state, OpenQA::Jobs::Constants::DONE,
'running job set to done if its worker re-connects claiming not to work on it anymore';
is $job->result, OpenQA::Jobs::Constants::INCOMPLETE,
'running job incompleted if its worker re-connects claiming not to work on it anymore';
is $job->state, DONE, 'running job set to done if its worker re-connects claiming not to work on it anymore';
is $job->result, INCOMPLETE, 'running job incompleted if its worker re-connects claiming not to work on it anymore';
like $job->reason, qr/abandoned: associated worker .+:\d+ re-connected but abandoned the job/, 'reason is set';

stop_workers;
Expand All @@ -239,18 +237,18 @@ subtest 'Simulation of heavy unstable load' => sub {
my %jobs;
my %w;
foreach my $j (@$allocated) {
ok !$jobs{$j->{job}}, "Job (" . $j->{job} . ") allocated only once";
ok !$w{$j->{worker}}, "Worker (" . $j->{worker} . ") used only once";
ok !$jobs{$j->{job}}, "Job ($j->{job}) allocated only once";
ok !$w{$j->{worker}}, "Worker ($j->{worker}) used only once";
$w{$j->{worker}}++;
$jobs{$j->{job}}++;
}

for my $dup (@duplicated) {
for (0 .. 2000) {
last if $dup->state eq OpenQA::Jobs::Constants::SCHEDULED;
last if $dup->state eq SCHEDULED;
sleep .1; # uncoverable statement
}
is $dup->state, OpenQA::Jobs::Constants::SCHEDULED, "Job(" . $dup->id . ") back in scheduled state";
is $dup->state, SCHEDULED, "Job(" . $dup->id . ") back in scheduled state";
}
stop_workers;
dead_workers($schema);
Expand All @@ -264,10 +262,10 @@ subtest 'Simulation of heavy unstable load' => sub {
is @$allocated, 0, 'All failed allocation on second step - workers were killed';
for my $dup (@duplicated) {
for (0 .. 2000) {
last if $dup->state eq OpenQA::Jobs::Constants::SCHEDULED;
last if $dup->state eq SCHEDULED;
sleep .1; # uncoverable statement
}
is $dup->state, OpenQA::Jobs::Constants::SCHEDULED, "Job(" . $dup->id . ") is still in scheduled state";
is $dup->state, SCHEDULED, "Job(" . $dup->id . ") is still in scheduled state";
}

stop_workers;
Expand Down
17 changes: 11 additions & 6 deletions t/lib/OpenQA/Test/Utils.pm
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ our (@EXPORT, @EXPORT_OK);
qw(unresponsive_worker broken_worker rejective_worker setup_share_dir setup_fullstack_temp_dir run_gru_job),
qw(stop_service start_worker unstable_worker fake_asset_server),
qw(cache_minion_worker cache_worker_service shared_hash embed_server_for_testing),
qw(run_cmd test_cmd wait_for_or_bail_out perform_minion_jobs),
qw(run_cmd test_cmd wait_for wait_for_or_bail_out perform_minion_jobs),
qw(prepare_clean_needles_dir prepare_default_needle mock_io_loop assume_all_assets_exist)
);

Expand Down Expand Up @@ -607,17 +607,22 @@ sub test_cmd {
return $ret;
}

sub wait_for_or_bail_out : prototype(&*;*) { # `&*;*` allows calling it like `wait_for_or_bail_out { 1 } 'foo'`
sub wait_for : prototype(&*;*) { # `&*;*` allows calling it like `wait_for { 1 } 'foo'`
my ($function, $description, $args) = @_;
my $timeout = $args->{timeout} // 60;
my $interval = $args->{interval} // .1;

note "Waiting for $description to become available";
note "Waiting for '$description' to become available";
while ($timeout > 0) {
return if $function->();
$timeout -= sleep $interval; # uncoverable statement function might return early one line up
return 1 if $function->();
$timeout -= sleep $interval; # uncoverable statement (function might return early one line up)
}
BAIL_OUT "$description not available";
return 0; # uncoverable statement (only invoked if tests would fail)
}

sub wait_for_or_bail_out : prototype(&*;*) { # `&*;*` allows calling it like `wait_for_or_bail_out { 1 } 'foo'`
my ($function, $description, $args) = @_;
wait_for \&$function, $description, $args or BAIL_OUT "'$description' not available";
}

sub prepare_clean_needles_dir ($dir = 't/data/openqa/share/tests/opensuse/needles') {
Expand Down

0 comments on commit f9fc4d9

Please sign in to comment.