From c18cca9502358227e9d26370bd632f5f3532266d Mon Sep 17 00:00:00 2001 From: Sebastian Chlad Date: Wed, 4 Dec 2019 13:49:10 +0100 Subject: [PATCH] HPC: use script_run for munge keys and slurm conf Both, munge and slurm configs are scp using custom util method: exec_and_insert_password. The patch is trying to simplify the approach and thus make it more reliable by making sure the ssh keys are distributed to all nodes so that direct usage of scp is allowed. --- lib/hpc/configs.pm | 8 ++++---- lib/hpcbase.pm | 4 ++-- tests/hpc/slurm_master.pm | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/hpc/configs.pm b/lib/hpc/configs.pm index c446b14a60bf..afe3940e1099 100644 --- a/lib/hpc/configs.pm +++ b/lib/hpc/configs.pm @@ -23,14 +23,14 @@ sub prepare_slurm_conf { if ($slurm_conf eq "basic") { my $config = << "EOF"; -sed -i "/^ControlMachine.*/c\\ControlMachine=$cluster_ctl_nodes[0]" /etc/slurm/slurm.conf +sed -i "/^SlurmctldHost.*/c\\SlurmctldHost=$cluster_ctl_nodes[0]" /etc/slurm/slurm.conf sed -i "/^NodeName.*/c\\NodeName=$cluster_ctl_nodes,$cluster_compute_nodes Sockets=1 CoresPerSocket=1 ThreadsPerCore=1 State=unknown" /etc/slurm/slurm.conf sed -i "/^PartitionName.*/c\\PartitionName=normal Nodes=$cluster_ctl_nodes,$cluster_compute_nodes Default=YES MaxTime=24:00:00 State=UP" /etc/slurm/slurm.conf EOF assert_script_run($_) foreach (split /\n/, $config); } elsif ($slurm_conf eq "accounting") { my $config = << "EOF"; -sed -i "/^ControlMachine.*/c\\ControlMachine=$cluster_ctl_nodes[0]" /etc/slurm/slurm.conf +sed -i "/^SlurmctldHost.*/c\\SlurmctldHost=$cluster_ctl_nodes[0]" /etc/slurm/slurm.conf sed -i "/^NodeName.*/c\\NodeName=$cluster_ctl_nodes,$cluster_compute_nodes Sockets=1 CoresPerSocket=1 ThreadsPerCore=1 State=unknown" /etc/slurm/slurm.conf sed -i "/^PartitionName.*/c\\PartitionName=normal Nodes=$cluster_ctl_nodes,$cluster_compute_nodes Default=YES MaxTime=24:00:00 State=UP" /etc/slurm/slurm.conf sed -i "/^#JobAcctGatherType.*/c\\JobAcctGatherType=jobacct_gather/linux" /etc/slurm/slurm.conf @@ -42,7 +42,7 @@ EOF assert_script_run($_) foreach (split /\n/, $config); } elsif ($slurm_conf eq "ha") { my $config = << "EOF"; -sed -i "/^ControlMachine.*/c\\ControlMachine=$cluster_ctl_nodes[0]" /etc/slurm/slurm.conf +sed -i "/^SlurmctldHost.*/c\\SlurmctldHost=$cluster_ctl_nodes[0]" /etc/slurm/slurm.conf sed -i "/^#BackupController.*/c\\BackupController=$cluster_ctl_nodes[1]" /etc/slurm/slurm.conf sed -i "/^StateSaveLocation.*/c\\StateSaveLocation=/shared/slurm/" /etc/slurm/slurm.conf sed -i "/^NodeName.*/c\\NodeName=$cluster_ctl_nodes,$cluster_compute_nodes Sockets=1 CoresPerSocket=1 ThreadsPerCore=1 State=unknown" /etc/slurm/slurm.conf @@ -53,7 +53,7 @@ EOF assert_script_run($_) foreach (split /\n/, $config); } elsif ($slurm_conf eq "nfs_db") { my $config = << "EOF"; -sed -i "/^ControlMachine.*/c\\ControlMachine=$cluster_ctl_nodes[0]" /etc/slurm/slurm.conf +sed -i "/^SlurmctldHost.*/c\\SlurmctldHost=$cluster_ctl_nodes[0]" /etc/slurm/slurm.conf sed -i "/^#BackupController.*/c\\BackupController=$cluster_ctl_nodes[1]" /etc/slurm/slurm.conf sed -i "/^StateSaveLocation.*/c\\StateSaveLocation=/shared/slurm/" /etc/slurm/slurm.conf sed -i "/^NodeName.*/c\\NodeName=$cluster_ctl_nodes,$cluster_compute_nodes Sockets=1 CoresPerSocket=1 ThreadsPerCore=1 State=unknown" /etc/slurm/slurm.conf diff --git a/lib/hpcbase.pm b/lib/hpcbase.pm index 6bb3413c2774..cd6e8ef7ae1c 100644 --- a/lib/hpcbase.pm +++ b/lib/hpcbase.pm @@ -85,7 +85,7 @@ sub distribute_munge_key { my ($self) = @_; my @cluster_nodes = cluster_names(); foreach (@cluster_nodes) { - exec_and_insert_password("scp -o StrictHostKeyChecking=no /etc/munge/munge.key root\@$_:/etc/munge/munge.key"); + script_run("scp -o StrictHostKeyChecking=no /etc/munge/munge.key root\@$_:/etc/munge/munge.key"); } } @@ -93,7 +93,7 @@ sub distribute_slurm_conf { my ($self) = @_; my @cluster_nodes = cluster_names(); foreach (@cluster_nodes) { - exec_and_insert_password("scp -o StrictHostKeyChecking=no /etc/slurm/slurm.conf root\@$_:/etc/slurm/slurm.conf"); + script_run("scp -o StrictHostKeyChecking=no /etc/slurm/slurm.conf root\@$_:/etc/slurm/slurm.conf"); } } diff --git a/tests/hpc/slurm_master.pm b/tests/hpc/slurm_master.pm index ade1c3593ff5..6da447178aec 100644 --- a/tests/hpc/slurm_master.pm +++ b/tests/hpc/slurm_master.pm @@ -219,6 +219,7 @@ sub run { my $nodes = get_required_var('CLUSTER_NODES'); my $slurm_conf = get_required_var('SLURM_CONF'); $self->prepare_user_and_group(); + $self->generate_and_distribute_ssh(); # provision HPC cluster, so the proper rpms are installed, # munge key is distributed to all nodes, so is slurm.conf