From 215697368aa04b0d97ae2321ec321d9ca8e2cb91 Mon Sep 17 00:00:00 2001 From: Sven Nierlein Date: Mon, 27 Jun 2022 21:54:37 +0200 Subject: [PATCH] improve waiting for core reload --- lib/Thruk/Utils.pm | 23 ++++++++++--------- .../business_process/lib/Thruk/BP/Utils.pm | 14 +++++++++-- .../conf/lib/Thruk/Controller/conf.pm | 2 +- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/lib/Thruk/Utils.pm b/lib/Thruk/Utils.pm index 0fe5420c9c..6ae6014707 100644 --- a/lib/Thruk/Utils.pm +++ b/lib/Thruk/Utils.pm @@ -2347,19 +2347,20 @@ sub restart_later { wait_after_reload($c, [$backend], [$timestamp]) -wait up to 60 seconds till the core responds +wait up to 30 seconds till the core responds =cut sub wait_after_reload { - my($c, $pkey, $time) = @_; - $c->stats->profile(begin => "wait_after_reload ($time)"); + my($c, $pkey, $last_reload) = @_; + $c->stats->profile(begin => "wait_after_reload"); $pkey = $c->stash->{'param_backend'} unless $pkey; my $start = time(); - if(!$pkey && !$time) { - _debug('no peer key and time, waiting 3 seconds'); + if(!$pkey) { + _debug('no peer key, waiting 3 seconds'); sleep 3; } + $last_reload = time() unless $last_reload; # wait until core responds again my $procinfo = {}; @@ -2370,7 +2371,7 @@ sub wait_after_reload { 'header' => { 'WaitTimeout' => 2000, 'WaitTrigger' => 'all', # using something else seems not to work all the time - 'WaitCondition' => "program_start > ".$time, + 'WaitCondition' => "program_start > ".$last_reload, }, }; } @@ -2394,11 +2395,11 @@ sub wait_after_reload { $msg = 'still waiting for core reload for '.(time()-$start).'s: '.$c->stash->{'failed_backends'}->{$pkey}; _debug($msg); } - elsif($pkey and $time) { + elsif($pkey and $last_reload) { # not yet restarted if($procinfo and $procinfo->{$pkey} and $procinfo->{$pkey}->{'program_start'}) { $c->stats->profile(comment => "core program_start: ".$procinfo->{$pkey}->{'program_start'}); - if($procinfo->{$pkey}->{'program_start'} > $time) { + if($procinfo->{$pkey}->{'program_start'} > $last_reload) { $done = 1; _debug('core reloaded after '.(time()-$start).'s, last program_start: '.(scalar localtime($procinfo->{$pkey}->{'program_start'}))); last; @@ -2408,14 +2409,14 @@ sub wait_after_reload { } } } - elsif($time) { + elsif($last_reload) { my $newest_core = 0; if($procinfo) { for my $key (keys %{$procinfo}) { if($procinfo->{$key}->{'program_start'} > $newest_core) { $newest_core = $procinfo->{$key}->{'program_start'}; } } $c->stats->profile(comment => "core program_start: ".$newest_core); - if($newest_core > $time) { + if($newest_core > $last_reload) { $done = 1; last; } else { @@ -2433,7 +2434,7 @@ sub wait_after_reload { sleep(1); } } - $c->stats->profile(end => "wait_after_reload ($time)"); + $c->stats->profile(end => "wait_after_reload"); if($done) { # clean up cached groups which may have changed $c->cache->clear(); diff --git a/plugins/plugins-available/business_process/lib/Thruk/BP/Utils.pm b/plugins/plugins-available/business_process/lib/Thruk/BP/Utils.pm index 652b0c8515..2786960489 100644 --- a/plugins/plugins-available/business_process/lib/Thruk/BP/Utils.pm +++ b/plugins/plugins-available/business_process/lib/Thruk/BP/Utils.pm @@ -303,7 +303,6 @@ sub save_bp_objects { } # and reload - my $time = time(); my $pkey; if($result_backend) { my $peer = $c->db->get_peer_by_key($result_backend); @@ -317,6 +316,17 @@ sub save_bp_objects { } } } + + my $last_reload = time() - 1; + if($pkey) { + $last_reload = $c->stash->{'pi_detail'}->{$pkey}->{'program_start'}; + if(!$last_reload) { + my $processinfo = $c->db->get_processinfo(backends => $pkey); + $last_reload = ($processinfo->{$pkey} && $processinfo->{$pkey}->{'program_start'}) || (time() - 1); + } + sleep(1) if $last_reload == time(); + } + my $cmd = $c->config->{'Thruk::Plugin::BP'}->{'objects_reload_cmd'}; my $reloaded = 0; if($cmd) { @@ -335,7 +345,7 @@ sub save_bp_objects { $reloaded = 1; } if($rc == 0 && $reloaded) { - my $core_reloaded = Thruk::Utils::wait_after_reload($c, $pkey, $time-1); + my $core_reloaded = Thruk::Utils::wait_after_reload($c, $pkey, $last_reload); if(!$core_reloaded) { ($rc, $msg) = (1, 'business process saved but core failed to restart'); } diff --git a/plugins/plugins-available/conf/lib/Thruk/Controller/conf.pm b/plugins/plugins-available/conf/lib/Thruk/Controller/conf.pm index af85f9a038..59f457cdeb 100644 --- a/plugins/plugins-available/conf/lib/Thruk/Controller/conf.pm +++ b/plugins/plugins-available/conf/lib/Thruk/Controller/conf.pm @@ -2737,9 +2737,9 @@ sub _config_reload { if(!$last_reload) { my $processinfo = $c->db->get_processinfo(backends => $pkey); $last_reload = ($processinfo->{$pkey} && $processinfo->{$pkey}->{'program_start'}) || (time() - 1); + sleep(1) if $last_reload == time(); } - $c->stats->profile(comment => "program_start before reload: ".$last_reload); if($c->stash->{'peer_conftool'}->{'obj_reload_cmd'}) { if($c->{'obj_db'}->is_remote() && $c->{'obj_db'}->remote_config_reload($c)) { Thruk::Utils::set_message( $c, 'success_message', 'config reloaded successfully' );