Skip to content

Commit

Permalink
Improve test for re-scheduling and marking jobs as incomplete
Browse files Browse the repository at this point in the history
* Reduce the execution time by several seconds by reducing/mocking
  the status update interval and some sleeps
* Add further notes
* Make the unstable workers not that unstable for the sake of this
  tests. There's still 'simulation of heavy unstable load'.
  • Loading branch information
Martchus committed Dec 16, 2019
1 parent a2c0469 commit ca5a73b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 35 deletions.
62 changes: 39 additions & 23 deletions t/05-scheduler-full.t
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ BEGIN {
use lib "$FindBin::Bin/lib";
use OpenQA::Scheduler::Client;
use OpenQA::Scheduler::Model::Jobs;
use OpenQA::Worker::WebUIConnection;
use OpenQA::Utils;
use OpenQA::Test::Database;
use Test::More;
Expand All @@ -54,7 +55,7 @@ use OpenQA::Test::Utils qw(
use Mojolicious;
use File::Path qw(make_path remove_tree);
use DateTime;
# This test have to be treated like fullstack.
# This test has to be treated like fullstack.
plan skip_all => "set SCHEDULER_FULLSTACK=1 (be careful)" unless $ENV{SCHEDULER_FULLSTACK};

init_db();
Expand Down Expand Up @@ -111,70 +112,85 @@ subtest 'Scheduler worker job allocation' => sub {
($allocated) = scheduler_step();
is @$allocated, 0;

dead_workers($schema);

kill_service($_, 1) for ($w1_pid, $w2_pid);

dead_workers($schema);
};

subtest 'Simulation of unstable workers' => sub {
my @latest = $schema->resultset("Jobs")->latest_jobs;
subtest 're-scheduling and incompletion of jobs when worker is unresponsive or crashes completely' => sub {
# avoid wasting time waiting for status updates
my $web_ui_connection_mock = Test::MockModule->new('OpenQA::Worker::WebUIConnection');
$web_ui_connection_mock->mock(_calculate_status_update_interval => .1);

my $jobs = $schema->resultset('Jobs');
my @latest = $jobs->latest_jobs;
shift(@latest)->auto_duplicate();

# try to allocate to previous worker and fail!
my ($allocated) = scheduler_step();

# simulate unresponsive worker which will register itself but not grab any jobs
my $unstable_w_pid = unresponsive_worker($k->key, $k->secret, "http://localhost:$mojoport", 3);
# FIXME: Why waiting for worker 4 here? The "unresponsive" worker has ID 5.
wait_for_worker($schema, 4);
wait_for_worker($schema, 5);

note('waiting for job to be assigned');
$allocated = scheduler_step();
is(@$allocated, 1, 'one job allocated');
is(@{$allocated}[0]->{job}, 99982, 'right job allocated');
is(@{$allocated}[0]->{worker}, 5, 'job allocated to expected worker');
for (0 .. 100) {
last if $jobs->find(99982)->state eq OpenQA::Jobs::Constants::ASSIGNED;
sleep .2;
}
is $jobs->find(99982)->state, OpenQA::Jobs::Constants::ASSIGNED, 'job is assigned';

note('waiting for assigned job to be re-scheduled');
for (0 .. 100) {
last if $schema->resultset("Jobs")->find(99982)->state eq OpenQA::Jobs::Constants::SCHEDULED;
sleep 2;
last if $jobs->find(99982)->state eq OpenQA::Jobs::Constants::SCHEDULED;
sleep .2;
}

is $schema->resultset("Jobs")->find(99982)->state, OpenQA::Jobs::Constants::SCHEDULED,
is $jobs->find(99982)->state, OpenQA::Jobs::Constants::SCHEDULED,
'assigned job set back to scheduled if worker reports back again but has abandoned the job';
kill_service($unstable_w_pid, 1);
sleep 5;

scheduler_step();
dead_workers($schema);
kill_service($unstable_w_pid, 1);

# simulate unstable worker
$unstable_w_pid = unstable_worker($k->key, $k->secret, "http://localhost:$mojoport", 3, 8, 3);
# start unstable worker again
$unstable_w_pid = unstable_worker($k->key, $k->secret, "http://localhost:$mojoport", 3, -1);
wait_for_worker($schema, 5);

($allocated) = scheduler_step();
is(@$allocated, 1, 'one job allocated');
is(@{$allocated}[0]->{job}, 99982, 'right job allocated');
is(@{$allocated}[0]->{worker}, 5, 'job allocated to expected worker');

for (0 .. 100) {
last if $jobs->find(99982)->state eq OpenQA::Jobs::Constants::ASSIGNED;
sleep .2;
}
is $jobs->find(99982)->state, OpenQA::Jobs::Constants::ASSIGNED, 'job is assigned again';

# assume the job has been actually started
$jobs->find(99982)->update({state => OpenQA::Jobs::Constants::RUNNING});

kill_service($unstable_w_pid, 1);
is $schema->resultset("Jobs")->find(99982)->state, OpenQA::Jobs::Constants::ASSIGNED;

$unstable_w_pid = unstable_worker($k->key, $k->secret, "http://localhost:$mojoport", 3, 8);
$unstable_w_pid = unstable_worker($k->key, $k->secret, "http://localhost:$mojoport", 3, -1);
wait_for_worker($schema, 5);

note('waiting for job to be incompleted');
for (0 .. 100) {
last if $schema->resultset("Jobs")->find(99982)->state eq OpenQA::Jobs::Constants::DONE;
sleep 2;
last if $jobs->find(99982)->state eq OpenQA::Jobs::Constants::DONE;
sleep .2;
}

my $job = $schema->resultset("Jobs")->find(99982);
my $job = $jobs->find(99982);
is $job->state, OpenQA::Jobs::Constants::DONE,
'running job set to done if its worker re-connects claiming not to work on it anymore';
is $job->result, OpenQA::Jobs::Constants::INCOMPLETE,
'running job incompleted if its worker re-connects claiming not to work on it anymore';

dead_workers($schema);
kill_service($unstable_w_pid, 1);
dead_workers($schema);
};

subtest 'Simulation of heavy unstable load' => sub {
Expand Down
23 changes: 11 additions & 12 deletions t/lib/OpenQA/Test/Utils.pm
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use Mojo::IOLoop;
use Mojo::IOLoop::ReadWriteProcess 'process';
use Mojo::Server::Daemon;
use Test::MockModule;
use Time::HiRes 'sleep';

BEGIN {
if (!$ENV{MOJO_HOME}) {
Expand Down Expand Up @@ -142,13 +143,13 @@ sub kill_service {
sub wait_for_worker {
my ($schema, $id) = @_;

for (0 .. 10) {
note("Waiting for worker with ID $id");
sleep 2;
note("Waiting for worker with ID $id");
for (0 .. 40) {
sleep .5;
my $worker = $schema->resultset('Workers')->find($id);
return undef if defined $worker && !$worker->dead;
}
note("No worker with ID $id not active");
note("No worker with ID $id active");
}

sub create_webapi {
Expand Down Expand Up @@ -305,12 +306,10 @@ sub unstable_worker {
# the help of the Doctor would be really appreciated here.
my ($apikey, $apisecret, $host, $instance, $ticks, $sleep) = @_;
note("Starting unstable worker. Instance: $instance for host $host");
$ticks = 1 unless $ticks;
$ticks = 1 unless defined $ticks;

my $pid = fork();
if ($pid == 0) {
use Mojo::IOLoop;

my $worker = OpenQA::Worker->new(
{
apikey => $apikey,
Expand All @@ -322,8 +321,11 @@ sub unstable_worker {
$worker->settings->add_webui_host($host);
$worker->log_setup_info;
$worker->init();
for (0 .. $ticks) {
Mojo::IOLoop->singleton->one_tick;
if ($ticks < 0) {
Mojo::IOLoop->singleton->start;
}
else {
Mojo::IOLoop->singleton->one_tick for (0 .. $ticks);
}
Devel::Cover::report() if Devel::Cover->can('report');
if ($sleep) {
Expand Down Expand Up @@ -355,9 +357,6 @@ sub c_worker {

my $pid = fork();
if ($pid == 0) {
use Mojo::IOLoop;
use Test::MockModule;

my $command_handler_mock = Test::MockModule->new('OpenQA::Worker::CommandHandler');
if ($bogus) {
$command_handler_mock->mock(
Expand Down

0 comments on commit ca5a73b

Please sign in to comment.