From c5f46b8b4325babe2ed18404b55c8d224452c696 Mon Sep 17 00:00:00 2001 From: Ondrej Holecek Date: Thu, 9 Apr 2015 15:23:57 +0200 Subject: [PATCH] properly exit when scheduler misbehaves - reported by maxlin, worker api-call timed out on inactivity from scheduler side. This does not solve the issue itself, but at least worker should not force it through with undefined vars - also die in timer only kills the timer, not the app itself --- lib/OpenQA/Worker/Common.pm | 9 +++++++-- lib/OpenQA/Worker/Jobs.pm | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/OpenQA/Worker/Common.pm b/lib/OpenQA/Worker/Common.pm index 41a35b4482a..ea4676a2730 100644 --- a/lib/OpenQA/Worker/Common.pm +++ b/lib/OpenQA/Worker/Common.pm @@ -172,7 +172,7 @@ sub api_call { my ($method, $path, $params, $json_data, $ignore_errors) = @_; state $call_running; - return undef unless verify_workerid(); + return unless verify_workerid(); if ($call_running) { # quit immediately @@ -293,6 +293,9 @@ sub _get_capabilities { } sub setup_websocket { + # no point in trying if we are not registered + return unless verify_workerid(); + # if there is an existing web socket connection wait until it finishes. if ($ws) { add_timer('setup_websocket', 2, \&setup_websocket, 1); @@ -367,7 +370,9 @@ sub register_worker { my $tx = $ua->post($ua_url => json => $worker_caps); unless ($tx->success && $tx->success->json) { if ($tx->error && $tx->error->{code} && $tx->error->{code} =~ /^4\d\d$/) { - die sprintf "server refused with code %s: %s\n", $tx->error->{code}, $tx->res->body; + # don't retry when 4xx codes are returned. There is problem with scheduler + printf "server refused with code %s: %s\n", $tx->error->{code}, $tx->res->body; + Mojo::IOLoop->stop; } print "failed to register worker, retry ...\n" if $verbose; add_timer('register_worker', 10, \®ister_worker, 1); diff --git a/lib/OpenQA/Worker/Jobs.pm b/lib/OpenQA/Worker/Jobs.pm index f2f37070071..ee4d24de40c 100644 --- a/lib/OpenQA/Worker/Jobs.pm +++ b/lib/OpenQA/Worker/Jobs.pm @@ -187,7 +187,7 @@ sub _stop_job($;$) { $job_done = 1; } } - unless ($job_done) { + unless ($job_done || $aborted eq 'api-failure') { # set job to done. if priority is less than threshold duplicate it # with worse priority so it can be picked up again. my %args; @@ -208,7 +208,7 @@ sub _stop_job($;$) { return; } # immediatelly check for already scheduled job - add_timer('check_job', 0, \&check_job, 1) unless ($job); + add_timer('check_job', 0, \&check_job, 1); } sub start_job {