diff --git a/contrib/scaling/scaling.pl b/contrib/scaling/scaling.pl index 6b501953823..1c4f5099de0 100755 --- a/contrib/scaling/scaling.pl +++ b/contrib/scaling/scaling.pl @@ -3,6 +3,10 @@ # Copyright (c) 2012 Los Alamos National Security, Inc. # All rights reserved. # Copyright (c) 2015-2016 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2018 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. + use strict; use Getopt::Long; @@ -21,6 +25,7 @@ my $rawoutput = 0; my $myresults = "myresults"; my $ppn = 1; +my $npmin = 1; my @csvrow; my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op); @@ -28,8 +33,8 @@ my @starterlist = qw(mpirun prun srun aprun); my @starteroptionlist = ("--novm", "", - "--distribution=cyclic -N", - "-N"); + "--distribution=cyclic", + ""); # Set to true if the script should merely print the cmds # it would run, but don't run them @@ -52,6 +57,7 @@ "results=s" => \$myresults, "rawout" => \$rawoutput, "ppn=s" => \$ppn, + "npmin=s" => \$npmin, ) or die "unable to parse options, stopped"; if ($HELP) { @@ -69,6 +75,7 @@ --results=file File where results are to be stored in comma-separated value format --rawout Provide raw timing output to the file --ppn=n Run n procs/node +--npmin=n Minimal number of nodes "; exit(0); } @@ -126,11 +133,11 @@ push @starteroptions, $opt; } elsif ($useaprun && $starter eq "aprun") { push @starters, $starter; - $opt = $starteroptionlist[$idx] . " " . $ppn; + $opt = $starteroptionlist[$idx] . " -N " . $ppn; push @starteroptions, $opt; } elsif ($usesrun && $starter eq "srun") { push @starters, $starter; - $opt = $starteroptionlist[$idx] . " " . $ppn; + $opt = $starteroptionlist[$idx] . " --ntasks-per-node " . $ppn; push @starteroptions, $opt; } } @@ -184,10 +191,21 @@ sub runcmd() { + my $rc; for (1..$reps) { $output = `$cmd`; + # Check the error code of the command; if the error code is alright + # just add a 0 in front of the number to neutraly mark the success; + # If the code is not correct, add a ! in front of the number to mark + # it invalid. + if($? != 0) { + $rc = "0"; + } + else { + $rc = "!"; + } if ($myresults && $rawoutput) { - print FILE $n . " " . $output . "\n"; + print FILE $n . " " . $output . " $rc\n"; } @lines = split(/\n/, $output); foreach $line (@lines) { @@ -209,14 +227,14 @@ () if (0 == $strloc) { if (0 == $idx) { # it must be in the next location - push @csvrow,$results[1]; + push @csvrow,join $rc,$results[1]; } else { # it must be in the prior location - push @csvrow,$results[$idx-1]; + push @csvrow,join $rc,$results[$idx-1]; } } else { # take the portion of the string up to the tag - push @csvrow,substr($res, 0, $strloc); + push @csvrow,join $rc,substr($res, 0, $strloc); } } else { $strloc = index($res, "elapsed"); @@ -227,14 +245,14 @@ () if (0 == $strloc) { if (0 == $idx) { # it must be in the next location - push @csvrow,$results[1]; + push @csvrow,join $rc,$results[1]; } else { # it must be in the prior location - push @csvrow,$results[$idx-1]; + push @csvrow,join $rc,$results[$idx-1]; } } else { # take the portion of the string up to the tag - push @csvrow,substr($res, 0, $strloc); + push @csvrow,join $rc,substr($res, 0, $strloc); } } } @@ -263,20 +281,28 @@ () } foreach $starter (@starters) { + my $dvmout; print "STARTER: $starter\n"; # if we are going to use the dvm, then we if ($starter eq "prun") { # need to start it - $cmd = "orte-dvm -mca pmix_system_server 1 2>&1 &"; + $cmd = "orte-dvm -mca pmix_system_server 1"; + print "##DVM: Launching $cmd\n"; if ($myresults) { print FILE "\n\n$cmd\n"; } if (!$SHOWME) { - system($cmd); - $havedvm = 1; + $havedvm = open($dvmout, $cmd."|") or die "##DVM: Spawn error $!\n"; + print "##DVM: pid=$havedvm\n"; + # Wait that the dvm reports that it is ready + my $waitready = <$dvmout>; + if($waitready =~ /DVM ready/i) { + print "##DVM: $waitready\n"; + } + else { + die "##DVM: error: $waitready\n"; + } } - # give it a couple of seconds to start - sleep 2; } if ($myresults) { @@ -285,6 +311,13 @@ () my $testnum = 0; foreach $test (@tests) { $option = $options[$testnum]; + if ($starter eq "aprun") { + $option =~ s/-mca\s+(\S+)\s+(\S+)/-e OMPI_MCA_$1=$2/g; + } + if ($starter eq "srun") { + $option =~ s/-mca\s+(\S+)\s+(\S+)\s*/OMPI_MCA_$1=$2,/g; + $option =~ s/\s*(OMPI_MCA\S+)/ --export=$1ALL/g; + } if (-e $test) { if ($myresults) { print FILE "#nodes,$test,$option\n"; @@ -294,11 +327,12 @@ () $cmd = $starter . $starteroptions[$index] . " $test 2>&1"; system($cmd); } - $n = 1; + $n = $npmin; while ($n <= $num_nodes) { push @csvrow,$n; - if ($starter eq "prun" or $starter eq "mpirun") { - $cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -n $n $test 2>&1"; + if ($starter eq "prun" or $starter eq "mpirun" or $starter eq "aprun") { + my $np = $n * $ppn; + $cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -n $np $test 2>&1"; } else { $cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -N $n $test 2>&1"; } @@ -326,6 +360,7 @@ () if (!$SHOWME) { $cmd = "prun --terminate"; system($cmd); + waitpid($havedvm, 0); } } $index = $index + 1; diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index 98ef551d3ca..028932cda22 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -371,7 +371,7 @@ static void vm_ready(int fd, short args, void *cbdata) } OBJ_RELEASE(buf); /* notify that the vm is ready */ - fprintf(stdout, "DVM ready\n"); + fprintf(stdout, "DVM ready\n"); fflush(stdout); OBJ_RELEASE(caddy); return; }