Skip to content

Commit

Permalink
improve waiting for core reload
Browse files Browse the repository at this point in the history
  • Loading branch information
sni committed Jun 27, 2022
1 parent fd1836f commit 2156973
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 14 deletions.
23 changes: 12 additions & 11 deletions lib/Thruk/Utils.pm
Expand Up @@ -2347,19 +2347,20 @@ sub restart_later {
wait_after_reload($c, [$backend], [$timestamp])
wait up to 60 seconds till the core responds
wait up to 30 seconds till the core responds
=cut

sub wait_after_reload {
my($c, $pkey, $time) = @_;
$c->stats->profile(begin => "wait_after_reload ($time)");
my($c, $pkey, $last_reload) = @_;
$c->stats->profile(begin => "wait_after_reload");
$pkey = $c->stash->{'param_backend'} unless $pkey;
my $start = time();
if(!$pkey && !$time) {
_debug('no peer key and time, waiting 3 seconds');
if(!$pkey) {
_debug('no peer key, waiting 3 seconds');
sleep 3;
}
$last_reload = time() unless $last_reload;

# wait until core responds again
my $procinfo = {};
Expand All @@ -2370,7 +2371,7 @@ sub wait_after_reload {
'header' => {
'WaitTimeout' => 2000,
'WaitTrigger' => 'all', # using something else seems not to work all the time
'WaitCondition' => "program_start > ".$time,
'WaitCondition' => "program_start > ".$last_reload,
},
};
}
Expand All @@ -2394,11 +2395,11 @@ sub wait_after_reload {
$msg = 'still waiting for core reload for '.(time()-$start).'s: '.$c->stash->{'failed_backends'}->{$pkey};
_debug($msg);
}
elsif($pkey and $time) {
elsif($pkey and $last_reload) {
# not yet restarted
if($procinfo and $procinfo->{$pkey} and $procinfo->{$pkey}->{'program_start'}) {
$c->stats->profile(comment => "core program_start: ".$procinfo->{$pkey}->{'program_start'});
if($procinfo->{$pkey}->{'program_start'} > $time) {
if($procinfo->{$pkey}->{'program_start'} > $last_reload) {
$done = 1;
_debug('core reloaded after '.(time()-$start).'s, last program_start: '.(scalar localtime($procinfo->{$pkey}->{'program_start'})));
last;
Expand All @@ -2408,14 +2409,14 @@ sub wait_after_reload {
}
}
}
elsif($time) {
elsif($last_reload) {
my $newest_core = 0;
if($procinfo) {
for my $key (keys %{$procinfo}) {
if($procinfo->{$key}->{'program_start'} > $newest_core) { $newest_core = $procinfo->{$key}->{'program_start'}; }
}
$c->stats->profile(comment => "core program_start: ".$newest_core);
if($newest_core > $time) {
if($newest_core > $last_reload) {
$done = 1;
last;
} else {
Expand All @@ -2433,7 +2434,7 @@ sub wait_after_reload {
sleep(1);
}
}
$c->stats->profile(end => "wait_after_reload ($time)");
$c->stats->profile(end => "wait_after_reload");
if($done) {
# clean up cached groups which may have changed
$c->cache->clear();
Expand Down
14 changes: 12 additions & 2 deletions plugins/plugins-available/business_process/lib/Thruk/BP/Utils.pm
Expand Up @@ -303,7 +303,6 @@ sub save_bp_objects {
}

# and reload
my $time = time();
my $pkey;
if($result_backend) {
my $peer = $c->db->get_peer_by_key($result_backend);
Expand All @@ -317,6 +316,17 @@ sub save_bp_objects {
}
}
}

my $last_reload = time() - 1;
if($pkey) {
$last_reload = $c->stash->{'pi_detail'}->{$pkey}->{'program_start'};
if(!$last_reload) {
my $processinfo = $c->db->get_processinfo(backends => $pkey);
$last_reload = ($processinfo->{$pkey} && $processinfo->{$pkey}->{'program_start'}) || (time() - 1);
}
sleep(1) if $last_reload == time();
}

my $cmd = $c->config->{'Thruk::Plugin::BP'}->{'objects_reload_cmd'};
my $reloaded = 0;
if($cmd) {
Expand All @@ -335,7 +345,7 @@ sub save_bp_objects {
$reloaded = 1;
}
if($rc == 0 && $reloaded) {
my $core_reloaded = Thruk::Utils::wait_after_reload($c, $pkey, $time-1);
my $core_reloaded = Thruk::Utils::wait_after_reload($c, $pkey, $last_reload);
if(!$core_reloaded) {
($rc, $msg) = (1, 'business process saved but core failed to restart');
}
Expand Down
Expand Up @@ -2737,9 +2737,9 @@ sub _config_reload {
if(!$last_reload) {
my $processinfo = $c->db->get_processinfo(backends => $pkey);
$last_reload = ($processinfo->{$pkey} && $processinfo->{$pkey}->{'program_start'}) || (time() - 1);
sleep(1) if $last_reload == time();
}

$c->stats->profile(comment => "program_start before reload: ".$last_reload);
if($c->stash->{'peer_conftool'}->{'obj_reload_cmd'}) {
if($c->{'obj_db'}->is_remote() && $c->{'obj_db'}->remote_config_reload($c)) {
Thruk::Utils::set_message( $c, 'success_message', 'config reloaded successfully' );
Expand Down

0 comments on commit 2156973

Please sign in to comment.