Skip to content

Commit

Permalink
Merge pull request #17662 from hadeskun/waitHanaDatabaseOffline
Browse files Browse the repository at this point in the history
Wait hana database offline
  • Loading branch information
lpalovsky committed Nov 2, 2023
2 parents f3a1844 + 61540e1 commit 37409ad
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 5 deletions.
102 changes: 99 additions & 3 deletions lib/sles4sap_publiccloud.pm
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ use hacluster;
use qesapdeployment;
use YAML::PP;
use publiccloud::instance;
use sles4sap;

our @EXPORT = qw(
run_cmd
Expand Down Expand Up @@ -57,6 +58,9 @@ our @EXPORT = qw(
display_full_status
list_cluster_nodes
sles4sap_cleanup
is_hana_database_online
get_hana_database_status
is_primary_node_online
);

=head2 run_cmd
Expand Down Expand Up @@ -349,8 +353,8 @@ sub check_takeover {
my ($self) = @_;
my $hostname = $self->{my_instance}->{instance_id};
my $retry_count = 0;
my $fenced_hana_status = $self->is_hana_online();
die("Fenced database '$hostname' is not offline") if ($fenced_hana_status == 1);
die("Database on the fenced node '$hostname' is not offline") if ($self->is_hana_database_online);
die("System replication '$hostname' is not offline") if ($self->is_primary_node_online);
TAKEOVER_LOOP: while (1) {
my $topology = $self->get_hana_topology();
Expand Down Expand Up @@ -381,7 +385,8 @@ sub check_takeover {
sub enable_replication {
my ($self) = @_;
my $hostname = $self->{my_instance}->{instance_id};
die("Fenced database '$hostname' is not offline") if ($self->is_hana_online());
die("Database on the fenced node '$hostname' is not offline") if ($self->is_hana_database_online);
die("System replication '$hostname' is not offline") if ($self->is_primary_node_online);
my $topology_out = $self->get_hana_topology(hostname => $hostname);
my %topology = %$topology_out;
Expand Down Expand Up @@ -831,4 +836,95 @@ sub list_cluster_nodes {
die 'None of the hosts are currently part of existing cluster' unless @cluster_nodes;
}
=head2 get_hana_database_status
Run a query to the hana database, parses "hdbsql" command output and check if the connection still is alive.
Returns 1 if the response from hana database is online, 0 otherwise
=cut
sub get_hana_database_status {
my ($self, $password_db, $instance_id) = @_;
my $hdb_cmd = "hdbsql -u SYSTEM -p $password_db -i $instance_id 'SELECT * FROM SYS.M_DATABASES;'";
my $output_cmd = $self->run_cmd(cmd => $hdb_cmd, runas => get_required_var("SAP_SIDADM"), proceed_on_failure => 1);
if ($output_cmd =~ /Connection failed/) {
record_info('HANA DB OFFLINE', "Hana database in primary node is offline. Here the output \n$output_cmd");
return 0;
}
return 1;
}
=head2 is_hana_database_online
Setup a timeout and check the hana database status is offline and there is not connection.
if the connection still is online run a wait and try again to get the status.
Returns 1 if the output of the hana database is online, 0 means that hana database is offline
=over 2
=item B<TIMEOUT> - default 900
=item B<TOTAL_CONSECUTIVE_PASSES> - default 5
=back
=cut
sub is_hana_database_online {
my ($self, %args) = @_;
my $timeout = bmwqemu::scale_timeout($args{timeout} // 900);
my $total_consecutive_passes = ($args{total_consecutive_passes} // 5);
my $instance_id = get_required_var('INSTANCE_ID');
my $db_status = -1;
my $consecutive_passes = 0;
my $password_db = get_required_var('_HANA_MASTER_PW');
my $start_time = time;
my $hdb_cmd = "hdbsql -u SYSTEM -p $password_db -i $instance_id 'SELECT * FROM SYS.M_DATABASES;'";
while ($consecutive_passes < $total_consecutive_passes) {
$db_status = $self->get_hana_database_status($password_db, $instance_id);
if (time - $start_time > $timeout) {
record_info("Hana database after timeout", $self->run_cmd(cmd => $hdb_cmd));
die("Hana database is still online");
}
if ($db_status == 0) {
last;
}
sleep 30;
++$consecutive_passes;
}
return $db_status;
}
=head2 is_primary_node_online
Check if primary node in a hana cluster is offline.
Returns if primary node status is offline with 0 and 1 online
=cut
sub is_primary_node_online {
my ($self, %args) = @_;
my $sapadmin = lc(get_required_var('INSTANCE_SID')) . 'adm';
# Wait by default for 5 minutes
my $time_to_wait = 300;
my $timeout = bmwqemu::scale_timeout($args{timeout} // 300);
my $cmd = "python exe/python_support/systemReplicationStatus.py";
my $output = "";
# Loop until is not primary the vm01 or timeout is reached
while ($time_to_wait > 0) {
$output = $self->run_cmd(cmd => $cmd, runas => $sapadmin, timeout => $timeout, proceed_on_failure => 1);
if ($output !~ /mode:[\r\n\s]+PRIMARY/) {
record_info('SYSTEM REPLICATION STATUS', "System replication status in pimary node.\n$@");
return 0;
}
$time_to_wait -= 10;
sleep 10;
}
record_info('SYSTEM REPLICATION STATUS', "System replication status in primary node.\n$output");
return 1;
}
1;
64 changes: 62 additions & 2 deletions t/12_sles4sap_publicccloud.t
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use strict;
use warnings;
use testapi;
use Test::MockModule;
use Test::Exception;
use Test::More;
use sles4sap_publiccloud;
use testapi;

use sles4sap_publiccloud;

subtest "Run 'setup_sbd_delay_publiccloud' with different values" => sub {
my $self = sles4sap_publiccloud->new();
Expand Down Expand Up @@ -112,4 +112,64 @@ subtest '[list_cluster_nodes]' => sub {
dies_ok { $self->list_cluster_nodes() } 'Expected failure: missing mandatory arg';
};

subtest '[is_hana_database_offline]' => sub {
my $self = sles4sap_publiccloud->new();
my $sles4sap_publiccloud = Test::MockModule->new('sles4sap_publiccloud', no_auto => 1);
$sles4sap_publiccloud->redefine(get_hana_database_status => sub { return 0; });
$sles4sap_publiccloud->redefine(record_info => sub { note(join(' ', 'RECORD_INFO -->', @_)); });
set_var('SAP_SIDADM', 'SAP_SIDADMTEST');
set_var('INSTANCE_ID', 'INSTANCE_IDTEST');
set_var('_HANA_MASTER_PW', '1234');

my $res = $self->is_hana_database_online();
set_var('SAP_SIDADM', undef);
set_var('INSTANCE_ID', undef);
set_var('_HANA_MASTER_PW', undef);
is $res, 0, "Hana database is offline";
};

subtest '[is_hana_database_offine with status online]' => sub {
my $self = sles4sap_publiccloud->new();
my $sles4sap_publiccloud = Test::MockModule->new('sles4sap_publiccloud', no_auto => 1);
$sles4sap_publiccloud->redefine(get_hana_database_status => sub { return 1; });
$sles4sap_publiccloud->redefine(record_info => sub { note(join(' ', 'RECORD_INFO -->', @_)); });
set_var('SAP_SIDADM', 'SAP_SIDADMTEST');
set_var('INSTANCE_ID', 'INSTANCE_IDTEST');
set_var('_HANA_MASTER_PW', '1234');

my $res = $self->is_hana_database_online();
set_var('SAP_SIDADM', undef);
set_var('INSTANCE_ID', undef);
set_var('_HANA_MASTER_PW', undef);
is $res, 1, "Hana database is online";
};

subtest '[is_primary_node_offline]' => sub {
my $self = sles4sap_publiccloud->new();
my $sles4sap_publiccloud = Test::MockModule->new('sles4sap_publiccloud', no_auto => 1);
my $res = "";
$sles4sap_publiccloud->redefine(run_cmd => sub { die "this system is not a system replication site" });
$sles4sap_publiccloud->redefine(record_info => sub { return; });

# Check if virtual machine 01 is a primary node and still belong to the system replication
set_var('INSTANCE_SID', 'INSTANCE_SIDTEST');
eval {
$res = $self->is_primary_node_online();
};
set_var('INSTANCE_SID', undef);
unlike($@, qr/mode:[\r\n\s]+PRIMARY/, 'System replication is offline on primary node');
};

subtest '[is_primary_node_online]' => sub {
my $self = sles4sap_publiccloud->new();
my $sles4sap_publiccloud = Test::MockModule->new('sles4sap_publiccloud', no_auto => 1);
$sles4sap_publiccloud->redefine(run_cmd => sub { return 'mode: PRIMARY'; });
$sles4sap_publiccloud->redefine(record_info => sub { return; });
set_var('INSTANCE_SID', 'INSTANCE_SIDTEST');

my $res = $self->is_primary_node_online();
set_var('INSTANCE_SID', undef);
is $res, 1, "System replication is online on primary node";
};

done_testing;

0 comments on commit 37409ad

Please sign in to comment.