diff --git a/config/pmix.m4 b/config/pmix.m4 index e7166dffeb..98dad7dcc1 100644 --- a/config/pmix.m4 +++ b/config/pmix.m4 @@ -1022,6 +1022,7 @@ AC_DEFUN([PMIX_SETUP_CORE],[ pmix_config_prefix[src/tools/wrapper/Makefile] pmix_config_prefix[src/tools/wrapper/pmixcc-wrapper-data.txt] pmix_config_prefix[src/tools/palloc/Makefile] + pmix_config_prefix[src/tools/pctrl/Makefile] ) # Success diff --git a/examples/pset.c b/examples/pset.c index 5b83c6d919..88ae24361a 100644 --- a/examples/pset.c +++ b/examples/pset.c @@ -16,7 +16,7 @@ * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2019 IBM Corporation. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -129,6 +129,7 @@ int main(int argc, char **argv) mylock_t mylock; myrel_t myrel; pmix_info_t info; + pmix_value_t *val; EXAMPLES_HIDE_UNUSED_PARAMS(argc, argv); @@ -180,6 +181,14 @@ int main(int argc, char **argv) } DEBUG_DESTRUCT_MYREL(&myrel); + // check if I can retrieve my new pset membership + if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_PSET_NAMES, NULL, 0, &val))) { + fprintf(stderr, "[%s:%d] PMIx_Get PMIX_PSET_NAMES returned %s\n", myproc.nspace, + myproc.rank, PMIx_Error_string(rc)); + goto done; + } + fprintf(stderr, "[%s:%d] belongs to psets %s\n", myproc.nspace, myproc.rank, val->data.string); + PMIX_VALUE_RELEASE(val); done: /* finalize us */ diff --git a/include/pmix.h b/include/pmix.h index 7e4ce16097..a8f0b5b326 100644 --- a/include/pmix.h +++ b/include/pmix.h @@ -1720,7 +1720,13 @@ PMIX_EXPORT pmix_value_cmp_t PMIx_Value_compare(pmix_value_t *v1, +PMIX_EXPORT void PMIx_Data_array_init(pmix_data_array_t *p, + pmix_data_type_t type); +PMIX_EXPORT void PMIx_Data_array_construct(pmix_data_array_t *p, + size_t num, pmix_data_type_t type); PMIX_EXPORT void PMIx_Data_array_destruct(pmix_data_array_t *d); +PMIX_EXPORT pmix_data_array_t* PMIx_Data_array_create(size_t n, pmix_data_type_t type); +PMIX_EXPORT void PMIx_Data_array_free(pmix_data_array_t *p); /* initialize an info struct */ diff --git a/include/pmix_common.h.in b/include/pmix_common.h.in index 442136678a..04f0f15b18 100644 --- a/include/pmix_common.h.in +++ b/include/pmix_common.h.in @@ -897,6 +897,7 @@ typedef uint32_t pmix_rank_t; #define PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned #define PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted #define PMIX_JOB_CTRL_TERMINATE "pmix.jctrl.term" // (bool) politely terminate the specified procs +#define PMIX_JOB_CTRL_DEFINE_PSET "pmix.jctrl.defpset" // (char*) Pset name to be assigned to the targets #define PMIX_REGISTER_CLEANUP "pmix.reg.cleanup" // (char*) comma-delimited list of files to // be removed upon process termination #define PMIX_REGISTER_CLEANUP_DIR "pmix.reg.cleanupdir" // (char*) comma-delimited list of directories to diff --git a/src/server/pmix_server_get.c b/src/server/pmix_server_get.c index ee46f937a9..e726546eb0 100644 --- a/src/server/pmix_server_get.c +++ b/src/server/pmix_server_get.c @@ -8,7 +8,7 @@ * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -160,6 +160,8 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_modex_cbfunc_t cbfunc, vo struct timeval tv = {0, 0}; pmix_buffer_t pbkt; pmix_cb_t cb; + pmix_kval_t *kval; + pmix_byte_object_t bo; pmix_proc_t proc; char *data; size_t sz, n; @@ -189,6 +191,8 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_modex_cbfunc_t cbfunc, vo PMIX_ERROR_LOG(rc); return rc; } + PMIX_LOAD_PROCID(&proc, nspace, rank); + /* retrieve any provided info structs */ cnt = 1; PMIX_BFROPS_UNPACK(rc, cd->peer, buf, &cd->ninfo, &cnt, PMIX_SIZE); @@ -236,6 +240,86 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_modex_cbfunc_t cbfunc, vo } } + /* check for a request for pset names - these are not associated + * with a given nspace. Instead, we are searching for any psets + * that contain the calling process */ + if (keyprovided && PMIx_Check_key(key, PMIX_PSET_NAMES)) { + /* loop over all known psets and collect names + * in which this proc is a member */ + pmix_pset_t *pset; + char **psets = NULL; + PMIX_LIST_FOREACH(pset, &pmix_server_globals.psets, pmix_pset_t) { + for (n=0; n < pset->nmembers; n++) { + if (PMIx_Check_procid(&pset->members[n], &proc)) { + PMIx_Argv_append_nosize(&psets, pset->name); + break; + } + } + } + if (NULL != psets) { + data = PMIx_Argv_join(psets, ','); + PMIx_Argv_free(psets); + // pass it back + // we have to assemble this data into a form that + // the client_get function can properly unpack + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + PMIX_CONSTRUCT(&cb, pmix_cb_t); + PMIX_KVAL_NEW(kval, PMIX_PSET_NAMES); + kval->value->data.string = data; + kval->value->type = PMIX_STRING; + pmix_list_append(&cb.kvs, &kval->super); + /* assemble the provided data into a byte object */ + PMIX_GDS_ASSEMB_KVS_REQ(rc, pmix_globals.mypeer, &proc, &cb.kvs, &pbkt, cd); + if (rc != PMIX_SUCCESS) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&cb); + return rc; + } + PMIX_DESTRUCT(&cb); + if (PMIX_PEER_IS_V1(cd->peer)) { + /* if the client is using v1, then it expects the + * data returned to it as the rank followed by a byte object containing + * a buffer - so we have to do a little gyration */ + pmix_buffer_t xfer; + PMIX_CONSTRUCT(&xfer, pmix_buffer_t); + PMIX_BFROPS_PACK(rc, cd->peer, &xfer, &pbkt, 1, PMIX_BUFFER); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&pbkt); + PMIX_DESTRUCT(&xfer); + PMIX_DESTRUCT(&cb); + return rc; + } + PMIX_UNLOAD_BUFFER(&xfer, bo.bytes, bo.size); + PMIX_DESTRUCT(&xfer); + } else { + PMIX_UNLOAD_BUFFER(&pbkt, bo.bytes, bo.size); + } + PMIX_DESTRUCT(&pbkt); + /* pack it for transmission */ + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + PMIX_BFROPS_PACK(rc, cd->peer, &pbkt, &bo, 1, PMIX_BYTE_OBJECT); + if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + PMIX_DESTRUCT(&pbkt); + return rc; + } + /* unload the resulting payload */ + PMIX_UNLOAD_BUFFER(&pbkt, data, sz); + PMIX_DESTRUCT(&pbkt); + /* call the internal callback function - it will + * release the cbdata */ + cbfunc(PMIX_SUCCESS, data, sz, cbdata, relfn, data); + /* return success so the server doesn't duplicate + * the release of cbdata */ + return PMIX_SUCCESS; + } else { + // return not found as this proc doesn't belong to any psets + return PMIX_ERR_NOT_FOUND; + } + } + /* find the nspace object for the target proc */ nptr = NULL; PMIX_LIST_FOREACH (ns, &pmix_globals.nspaces, pmix_namespace_t) { @@ -703,7 +787,7 @@ static pmix_status_t get_job_data(char *nspace, } if (PMIX_PEER_IS_V1(cd->peer)) { /* if the client is using v1, then it expects the - * data returned to it as the rank followed by abyte object containing + * data returned to it as the rank followed by a byte object containing * a buffer - so we have to do a little gyration */ pmix_buffer_t xfer; PMIX_CONSTRUCT(&xfer, pmix_buffer_t); diff --git a/src/tools/Makefile.include b/src/tools/Makefile.include index 6aa9065833..a1027053d4 100644 --- a/src/tools/Makefile.include +++ b/src/tools/Makefile.include @@ -33,7 +33,8 @@ SUBDIRS += \ tools/pattrs \ tools/pquery \ tools/wrapper \ - tools/palloc + tools/palloc \ + tools/pctrl DIST_SUBDIRS += \ tools/pevent \ @@ -43,4 +44,5 @@ DIST_SUBDIRS += \ tools/pattrs \ tools/pquery \ tools/wrapper \ - tools/palloc + tools/palloc \ + tools/pctrl diff --git a/src/tools/pctrl/Makefile.am b/src/tools/pctrl/Makefile.am new file mode 100644 index 0000000000..8714c72395 --- /dev/null +++ b/src/tools/pctrl/Makefile.am @@ -0,0 +1,33 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright (c) 2017-2020 Intel, Inc. All rights reserved. +# Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +if PMIX_INSTALL_BINARIES + +bin_PROGRAMS = pctrl + +dist_pmixdata_DATA = help-pctrl.txt + +endif # PMIX_INSTALL_BINARIES + +pctrl_SOURCES = pctrl.c +pctrl_LDADD = \ + $(top_builddir)/src/libpmix.la diff --git a/src/tools/pctrl/help-pctrl.txt b/src/tools/pctrl/help-pctrl.txt new file mode 100644 index 0000000000..ef054547f8 --- /dev/null +++ b/src/tools/pctrl/help-pctrl.txt @@ -0,0 +1,182 @@ +# -*- text -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017-2020 Intel, Inc. All rights reserved. +# Copyright (c) 2022-2023 Nanook Consulting. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English help file for pctrl +# +[usage] +%s (%s) %s + +Usage: %s [OPTION]... +PMIx Job control tool + + +/***** General Options *****/ + +-h|--help This help message +-h|--help Help for the specified option +-v|--verbose Enable typical debug options +-V|--version Print version and exit + + --uri Specify the URI of the server to which we are to connect, or + the name of the file (specified as file:filename) that contains that info + --namespace Namespace of the daemon to which we should connect + --nspace Synonym for "namespace" + --pid PID of the daemon to which we should connect (int => PID or file: + for file containing the PID + --system-server-first First look for a system server and connect to it if found + --system-server-only Connect only to a system-level server + --tmpdir Set the root for the session directory tree + --wait-to-connect Delay specified number of seconds before trying to connect + --num-connect-retries Max number of times to try to connect + + --request-id String identifier for this job control request + --pause Pause the specified processes + --resume "Un-pause" the specified processes + --cancel Cancel the specified request ID ("all" => cancel all requests from this requestor) + --kill Force terminate the specified processes + --terminate Politely terminate the specified processes + --signal Provide the specified processes with the given signal + --restart Restart the specified processes using the given checkpoint ID + --checkpoint Checkpoint the specified processes and assign the given ID to it + --pset Define a new pset (with the given name) whose membership is + comprised of the specified processes + --targets Comma-delimited list of target processes for the requested + job-control operation + +# +# CONNECTION OPTIONS +# +# +[uri] +Specify the URI of the DVM master, or the name of the file (specified as +file:filename) that contains that info +# +[num-connect-retries] +Max number of times to try to connect to the specified server (int) +# +[pid] +PID of the daemon to which we should connect (int => PID or file: +for file containing the PID +# +[namespace] +Namespace of the daemon we are to connect to (char*) +# +[nspace] +Namespace of the daemon we are to connect to (char*) - synonym for "namespace" +# +[system-server-first] +First look for a system server and connect to it if found +# +[system-server-only] +Connect only to a system-level server - abort if one is not found +# +[tmpdir] +Define the root location for the session directory tree where the +rendezvous files can be found. + +The rendezvous files contain connection information for a target +server and are located in the session directory tree. It may be necessary to point the +tool at the location where those files can be found if that location is other than the +expected default. + +The root of the session directory defaults to the system temporary directory +as defined in the environment using (in precedence order) the envars TMPDIR, TEMP, and +finally TMP. In the absence of any of those variables, PMIx will default to the "/tmp" +location. +# +[wait-to-connect] +Delay specified number of seconds before trying to connect +# +# PCTRL-SPECIFIC OPTIONS +# +[request-id] +String identifier for this job control request. The request ID can be used for +subsequent query of request status and/or cancellation of the request. Note that +a request ID that matches a currently active request will be rejected - so care should be taken +to ensure that the ID provided is unique and not currently in use. +# +[pause] +Pause the specified processes. This typically takes the form of applying a SIGSTOP +to the specified processes. The pctrl tool will return a status indicating whether +or not the operation succeeded. +# +[resume] +Direct the specified processes to resume execution. This typically takes the form +of applying a SIGCONT signal to the specified processes. The pctrl tool will return +a status indicating whether or not the operation succeeded. +# +[cancel] +Cancel the specified request ID. The provided ID must match the ID provided to +a prior request - if the ID cannot be found, then a PMIX_ERR_NOT_FOUND status +shall be returned by pctrl. +# +[kill] +Force terminate the specified processes. Precise behavior depends upon the +runtime environment. However, typically the specified processes will receive +the following sequence of signals: + + * SIGCONT - wakeup a sleeping process + * SIGTERM - provide a trappable signal indicating that the process + should cleanly exit, if possible + * SIGKILL - forcibly terminate the process + +The pctrl tool will return a status indicating whether or not the operation +succeeded (i.e., all processes exited). +# +[terminate] +Politely terminate the specified processes. Precise behavior depends upon the +runtime environment. However, typically the specified processes will receive +the following sequence of signals: + + * SIGCONT - wakeup a sleeping process + * SIGTERM - provide a trappable signal indicating that the process + should cleanly exit, if possible + +The pctrl tool will return a status indicating whether or not the operation +succeeded (i.e., the processes terminated) +# +[signal] +Provide the specified processes with the given signal. Signals are to be +provided via their name (e.g., SIGTERM, SIGKILL) or an integer value +(e.g., -9). +# +[restart] +"Un-pause" the specified processes - usually implemented by applying a +SIGCONT signal to the processes. +# +[checkpoint] +Checkpoint the specified processes and assign the given ID to it. The checkpoint +operation will be conducted according to the method specified when the processes +were originally spawned. Support for this operation therefore depends both on the +capabilities of the runtime environment _and_ the application being told to +checkpoint. +# +[pset] +Define a new pset (with the given name) whose membership is comprised of the +specified processes. +# +[targets] +Comma-delimited list of target processes for the requested job-control +operation. Wildcard ranks (e.g., to apply the request to all processes +in the specified namespace) can be indicated with an asterisk ('*'). Syntax +requires that each process be identified as "nspace:rank". Note that +typical command line restrictions may necessitate the use of special +delimiters - e.g., "my\;weird.nspace:5". diff --git a/src/tools/pctrl/pctrl.c b/src/tools/pctrl/pctrl.c new file mode 100644 index 0000000000..ea14603461 --- /dev/null +++ b/src/tools/pctrl/pctrl.c @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. + * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "pmix_config.h" +#include "include/pmix.h" +#include "pmix_common.h" +#include "include/pmix_server.h" + +#include +#include +#include +#include +#include + +#include "include/pmix_tool.h" +#include "src/common/pmix_attributes.h" +#include "src/mca/base/pmix_base.h" +#include "src/mca/pinstalldirs/base/base.h" +#include "src/runtime/pmix_init_util.h" +#include "src/runtime/pmix_rte.h" +#include "src/threads/pmix_threads.h" +#include "src/util/pmix_cmd_line.h" +#include "src/util/pmix_keyval_parse.h" +#include "src/util/pmix_printf.h" +#include "src/util/pmix_show_help.h" + +static struct option pctrlptions[] = { + PMIX_OPTION_SHORT_DEFINE(PMIX_CLI_HELP, PMIX_ARG_OPTIONAL, 'h'), + PMIX_OPTION_SHORT_DEFINE(PMIX_CLI_VERSION, PMIX_ARG_NONE, 'V'), + PMIX_OPTION_SHORT_DEFINE(PMIX_CLI_VERBOSE, PMIX_ARG_NONE, 'v'), + + PMIX_OPTION_DEFINE(PMIX_CLI_SYS_SERVER_FIRST, PMIX_ARG_NONE), + PMIX_OPTION_DEFINE(PMIX_CLI_SYS_SERVER_ONLY, PMIX_ARG_NONE), + PMIX_OPTION_DEFINE(PMIX_CLI_WAIT_TO_CONNECT, PMIX_ARG_REQD), + PMIX_OPTION_DEFINE(PMIX_CLI_NUM_CONNECT_RETRIES, PMIX_ARG_REQD), + PMIX_OPTION_DEFINE(PMIX_CLI_PID, PMIX_ARG_REQD), + PMIX_OPTION_DEFINE(PMIX_CLI_NAMESPACE, PMIX_ARG_REQD), + PMIX_OPTION_DEFINE(PMIX_CLI_NSPACE, PMIX_ARG_REQD), + PMIX_OPTION_DEFINE(PMIX_CLI_URI, PMIX_ARG_REQD), + PMIX_OPTION_DEFINE(PMIX_CLI_TMPDIR, PMIX_ARG_REQD), + + PMIX_OPTION_DEFINE(PMIX_CLI_REQ_ID, PMIX_ARG_REQD), + PMIX_OPTION_DEFINE(PMIX_CLI_PAUSE, PMIX_ARG_NONE), + PMIX_OPTION_DEFINE(PMIX_CLI_RESUME, PMIX_ARG_NONE), + PMIX_OPTION_DEFINE(PMIX_CLI_CANCEL, PMIX_ARG_REQD), + PMIX_OPTION_DEFINE(PMIX_CLI_KILL, PMIX_ARG_NONE), + PMIX_OPTION_DEFINE(PMIX_CLI_TERMINATE, PMIX_ARG_NONE), + PMIX_OPTION_DEFINE(PMIX_CLI_RESTART, PMIX_ARG_REQD), + PMIX_OPTION_DEFINE(PMIX_CLI_CHKPT, PMIX_ARG_REQD), + PMIX_OPTION_DEFINE(PMIX_CLI_PSET_NAME, PMIX_ARG_REQD), + PMIX_OPTION_DEFINE(PMIX_CLI_TARGETS, PMIX_ARG_REQD), + + PMIX_OPTION_END +}; +static char *pctrlshorts = "h::vV"; + +static pmix_status_t convert_procs(const char *vals, + pmix_data_array_t *array); +static int convert_signal(const char *val); + +static void cbfunc(pmix_status_t status, + pmix_info_t *info, size_t ninfo, + void *cbdata, + pmix_release_cbfunc_t release_fn, + void *release_cbdata) +{ + pmix_shift_caddy_t *req = (pmix_shift_caddy_t*)cbdata; + size_t n; + pmix_status_t rc; + + PMIX_ACQUIRE_OBJECT(req); + +pmix_output(0, "CALLBACK RECVD: %s", PMIx_Error_string(status)); + req->status = status; + if (PMIX_SUCCESS == status && 0 < ninfo) { + req->ninfo = ninfo; + PMIX_INFO_CREATE(req->info, req->ninfo); + for (n=0; n < ninfo; n++) { + PMIX_INFO_XFER(&req->info[n], &info[n]); + if (PMIX_CHECK_KEY(&info[n], PMIX_SESSION_ID)) { + PMIX_VALUE_GET_NUMBER(rc, &info[n].value, req->sessionid, uint32_t); + if (PMIX_SUCCESS != rc) { + req->status = rc; + } + } + } + } + + if (NULL != release_fn) { + release_fn(release_cbdata); + } + + PMIX_POST_OBJECT(req); + PMIX_WAKEUP_THREAD(&req->lock); +} +int main(int argc, char **argv) +{ + pmix_proc_t myproc, *targets = NULL; + pmix_status_t rc; + pmix_shift_caddy_t *req = NULL; + pmix_info_t *info; + pmix_data_array_t darray; + void *options; + pmix_cli_result_t results; + pmix_cli_item_t *opt; + size_t n, ntargets; + pmix_rank_t rank = 0; + char hostname[PMIX_PATH_MAX], *kptr; + bool donotwait = false; + int sigval; + char *key = NULL; + PMIX_HIDE_UNUSED_PARAMS(argc); + + /* protect against problems if someone passes us thru a pipe + * and then abnormally terminates the pipe early */ + signal(SIGPIPE, SIG_IGN); + + /* init globals */ + pmix_tool_basename = "pctrl"; + gethostname(hostname, sizeof(hostname)); + + if (PMIX_SUCCESS != pmix_init_util(NULL, 0, NULL)) { + return PMIX_ERROR; + } + + /* Parse the command line options */ + PMIX_CONSTRUCT(&results, pmix_cli_result_t); + rc = pmix_cmd_line_parse(argv, pctrlshorts, pctrlptions, + NULL, &results, "help-pctrl.txt"); + + if (PMIX_SUCCESS != rc) { + if (PMIX_ERR_SILENT == rc) { + exit(rc); + } + if (PMIX_OPERATION_SUCCEEDED != rc) { + fprintf(stderr, "%s: command line error (%s)\n", argv[0], PMIx_Error_string(rc)); + exit(rc); + } + } + + // check for common required command line option + if (NULL == (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_TARGETS))) { + // must tell us the targets for the operation + exit(1); + } + // save the targets + // convert the provided string to an array of pmix_proc_t + rc = convert_procs(opt->values[0], &darray); + if (PMIX_SUCCESS != rc) { + // report the error + exit(rc); + } + targets = (pmix_proc_t*)darray.array; + ntargets = darray.size; + + // collect options for init + options = PMIx_Info_list_start(); + /* if we were given the pid of a starter, then direct that + * we connect to it */ + if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_PID))) { + /* see if it is an integer value */ + char *leftover, *param; + pid_t pid; + leftover = NULL; + pid = strtol(opt->values[0], &leftover, 10); + if (NULL == leftover || 0 == strlen(leftover)) { + /* it is an integer */ + rc = PMIx_Info_list_add(options, PMIX_SERVER_PIDINFO, &pid, PMIX_PID); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + free(targets); + exit(rc); + } + } else if (0 == strncasecmp(opt->values[0], "file", 4)) { + FILE *fp; + /* step over the file: prefix */ + param = strchr(opt->values[0], ':'); + if (NULL == param) { + /* malformed input */ + pmix_show_help("help-pquery.txt", "bad-option-input", true, pmix_tool_basename, + "--pid", opt->values[0], "file:path"); + PMIx_Info_list_release(options); + free(targets); + return PMIX_ERR_BAD_PARAM; + } + ++param; + fp = fopen(param, "r"); + if (NULL == fp) { + pmix_show_help("help-pquery.txt", "file-open-error", true, pmix_tool_basename, + "--pid", opt->values[0], param); + PMIx_Info_list_release(options); + free(targets); + return PMIX_ERR_BAD_PARAM; + } + rc = fscanf(fp, "%lu", (unsigned long *) &pid); + if (1 != rc) { + /* if we were unable to obtain the single conversion we + * require, then error out */ + pmix_show_help("help-pquery.txt", "bad-file", true, pmix_tool_basename, + "--pid", opt->values[0], param); + fclose(fp); + PMIx_Info_list_release(options); + free(targets); + return PMIX_ERR_BAD_PARAM; + } + fclose(fp); + rc = PMIx_Info_list_add(options, PMIX_SERVER_PIDINFO, &pid, PMIX_PID); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + free(targets); + exit(rc); + } + } else { /* a string that's neither an integer nor starts with 'file:' */ + pmix_show_help("help-pquery.txt", "bad-option-input", true, + pmix_tool_basename, "--pid", + opt->values[0], "file:path"); + PMIx_Info_list_release(options); + free(targets); + return PMIX_ERR_BAD_PARAM; + } + + } else if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_NAMESPACE))) { + rc = PMIx_Info_list_add(options, PMIX_SERVER_NSPACE, opt->values[0], PMIX_STRING); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + free(targets); + exit(rc); + } + + } else if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_NSPACE))) { + rc = PMIx_Info_list_add(options, PMIX_SERVER_NSPACE, opt->values[0], PMIX_STRING); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + free(targets); + exit(rc); + } + + } else if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_URI))) { + rc = PMIx_Info_list_add(options, PMIX_SERVER_URI, opt->values[0], PMIX_STRING); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + free(targets); + exit(rc); + } + + } else if (pmix_cmd_line_is_taken(&results, PMIX_CLI_SYS_SERVER_FIRST)) { + rc = PMIx_Info_list_add(options, PMIX_CONNECT_SYSTEM_FIRST, NULL, PMIX_BOOL); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + free(targets); + exit(rc); + } + + } else if (pmix_cmd_line_is_taken(&results, PMIX_CLI_SYS_SERVER_ONLY)) { + rc = PMIx_Info_list_add(options, PMIX_CONNECT_TO_SYSTEM, NULL, PMIX_BOOL); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + free(targets); + exit(rc); + } + } + + /* assign our own name */ + pmix_asprintf(&kptr, "%s.%s.%lu", pmix_tool_basename, hostname, (unsigned long)getpid()); + rc = PMIx_Info_list_add(options, PMIX_TOOL_NSPACE, kptr, PMIX_STRING); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + free(targets); + exit(rc); + } + free(kptr); + rc = PMIx_Info_list_add(options, PMIX_TOOL_RANK, &rank, PMIX_PROC_RANK); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + free(targets); + exit(rc); + } + rc = PMIx_Info_list_convert(options, &darray); + info = (pmix_info_t *) darray.array; + n = darray.size; + PMIx_Info_list_release(options); + + /* init as a tool */ + rc = PMIx_tool_init(&myproc, info, n); + PMIX_INFO_FREE(info, n); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx_tool_init failed: %s\n", PMIx_Error_string(rc)); + free(targets); + exit(rc); + } + + /* construct the job control request */ + options = PMIx_Info_list_start(); + + if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_REQ_ID))) { + rc = PMIx_Info_list_add(options, PMIX_JOB_CTRL_ID, opt->values[0], PMIX_STRING); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + goto done; + } + } + + if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_PAUSE))) { + key = "PAUSE"; + rc = PMIx_Info_list_add(options, PMIX_JOB_CTRL_PAUSE, NULL, PMIX_BOOL); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + goto done; + } + } + + if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_RESUME))) { + key = "RESUME"; + rc = PMIx_Info_list_add(options, PMIX_JOB_CTRL_RESUME, NULL, PMIX_BOOL); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + goto done; + } + } + + if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_CANCEL))) { + key = "CANCEL"; + rc = PMIx_Info_list_add(options, PMIX_JOB_CTRL_CANCEL, opt->values[0], PMIX_STRING); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + goto done; + } + } + + if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_KILL))) { + key = "KILL"; + rc = PMIx_Info_list_add(options, PMIX_JOB_CTRL_KILL, NULL, PMIX_BOOL); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + goto done; + } + } + + if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_RESTART))) { + key = "RESTART"; + rc = PMIx_Info_list_add(options, PMIX_JOB_CTRL_RESTART, opt->values[0], PMIX_STRING); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + goto done; + } + } + + if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_CHKPT))) { + key = "CHECKPOINT"; + rc = PMIx_Info_list_add(options, PMIX_JOB_CTRL_CHECKPOINT, opt->values[0], PMIX_STRING); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + goto done; + } + } + + if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_TERMINATE))) { + key = "TERMINATE"; + rc = PMIx_Info_list_add(options, PMIX_JOB_CTRL_TERMINATE, NULL, PMIX_BOOL); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + goto done; + } + } + + if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_PSET_NAME))) { + key = "DEFINE PSET"; + rc = PMIx_Info_list_add(options, PMIX_JOB_CTRL_DEFINE_PSET, opt->values[0], PMIX_STRING); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + goto done; + } + } + + if (NULL != (opt = pmix_cmd_line_get_param(&results, PMIX_CLI_SIGNAL))) { + key = "SIGNAL"; + sigval = convert_signal(opt->values[0]); + if (0 == sigval) { + // unrecognized signal + fprintf(stderr, "Unrecognized signal name: %s\n", opt->values[0]); + PMIx_Info_list_release(options); + rc = -1; + goto done; + } + rc = PMIx_Info_list_add(options, PMIX_JOB_CTRL_SIGNAL, &sigval, PMIX_INT); + if (PMIX_SUCCESS != rc) { + fprintf(stderr, "PMIx info list add failed: %s\n", PMIx_Error_string(rc)); + PMIx_Info_list_release(options); + goto done; + } + } + + req = PMIX_NEW(pmix_shift_caddy_t); + if (NULL != key) { + req->key = strdup(key); + } else { + req->key = strdup("N/A"); + } + rc = PMIx_Info_list_convert(options, &darray); + if (PMIX_ERR_EMPTY == rc) { + req->info = NULL; + req->ninfo = 0; + } else if (PMIX_SUCCESS != rc) { + PMIX_ERROR_LOG(rc); + goto done; + } else { + req->info = (pmix_info_t *) darray.array; + req->ninfo = darray.size; + } + PMIx_Info_list_release(options); + + rc = PMIx_Job_control_nb(targets, ntargets, + req->info, req->ninfo, + cbfunc, req); + if (PMIX_SUCCESS != rc) { + if (PMIX_OPERATION_SUCCEEDED == rc) { + fprintf(stderr, "Job control request %s granted\n", req->key); + PMIX_RELEASE(req); + rc = PMIX_SUCCESS; + goto done; + } + fprintf(stderr, "Job control request failed: %s\n", PMIx_Error_string(rc)); + goto done; + } + + if (donotwait) { + fprintf(stderr, "Job control request being processed\n"); + goto done; + } + + PMIX_WAIT_THREAD(&req->lock); + if (PMIX_SUCCESS == req->status) { + fprintf(stderr, "Job control %s granted\n", req->key); + } else { + fprintf(stderr, "Job control request failed: %s\n", PMIx_Error_string(req->status)); + } + +done: + if (NULL != req) { + PMIX_RELEASE(req); + } + if (NULL != targets) { + free(targets); + } + PMIx_tool_finalize(); + + return (rc); +} + +static pmix_status_t convert_procs(const char *vals, + pmix_data_array_t *array) +{ + char **p, *r; + size_t cnt, n; + pmix_proc_t *procs; + + // split on any commas + p = PMIx_Argv_split(vals, ','); + // count how many we have + cnt = PMIx_Argv_count(p); + // setup the array + PMIx_Data_array_construct(array, cnt, PMIX_PROC); + // load the array + procs = (pmix_proc_t*)array->array; + for (n=0; NULL != p[n]; n++) { + // find the nspace/rank delimiting ':' + r = strrchr(p[n], ':'); + *r = '\0'; + ++r; // step over the colon + PMIX_LOAD_NSPACE(procs[n].nspace, p[n]); + if ('*' == *r) { + procs[n].rank = PMIX_RANK_WILDCARD; + } else { + procs[n].rank = strtoul(r, NULL, 10); + } + } + return PMIX_SUCCESS; + +} + +typedef struct { + char *name; + int value; +} pmix_signal_t; + +static pmix_signal_t sigs[] = { +#ifdef SIGHUP + {"SIGHUP", SIGHUP}, +#endif +#ifdef SIGABRT + {"SIGABRT", SIGABRT}, +#endif +#ifdef SIGALRM + {"SIGALRM", SIGALRM}, +#endif +#ifdef SIGKILL + {"SIGKILL", SIGKILL}, +#endif +#ifdef SIGPIPE + {"SIGPIPE", SIGPIPE}, +#endif +#ifdef SIGTERM + {"SIGTERM", SIGTERM}, +#endif +#ifdef SIGSTOP + {"SIGSTOP", SIGSTOP}, +#endif +#ifdef SIGTSTP + {"SIGTSTP", SIGTSTP}, +#endif +#ifdef SIGCONT + {"SIGCONT", SIGCONT}, +#endif +#ifdef SIGCHLD + {"SIGCHLD", SIGCHLD}, +#endif +#ifdef SIGINFO + {"SIGINFO", SIGINFO}, +#endif +#ifdef SIGUSR1 + {"SIGUSR1", SIGUSR1}, +#endif +#ifdef SIGUSR2 + {"SIGUSR2", SIGUSR2}, +#endif +#ifdef SIGINT + {"SIGINT", SIGINT}, +#endif +#ifdef SIGTRAP + {"SIGTRAP", SIGTRAP}, +#endif + {NULL, 0} +}; + +static int convert_signal(const char *val) +{ + int n; + + n = 0; + while (NULL != sigs[n].name) { + if (0 == strcasecmp(val, sigs[n].name)) { + return sigs[n].value; + } + ++n; + } + return 0; +} diff --git a/src/util/pmix_cmd_line.h b/src/util/pmix_cmd_line.h index 543e15c7fb..089a46bf49 100644 --- a/src/util/pmix_cmd_line.h +++ b/src/util/pmix_cmd_line.h @@ -145,6 +145,16 @@ PMIX_CLASS_DECLARATION(pmix_cli_result_t); #define PMIX_CLI_DEPENDENCY "dependency" // required, short is 'd' #define PMIX_CLI_DO_NOT_WAIT "do-not-wait" // none +// Job control options +#define PMIX_CLI_PAUSE "pause" // none +#define PMIX_CLI_RESUME "resume" // none +#define PMIX_CLI_CANCEL "cancel" // required +#define PMIX_CLI_KILL "kill" // none +#define PMIX_CLI_RESTART "restart" // required +#define PMIX_CLI_CHKPT "checkpoint" // required +#define PMIX_CLI_TARGETS "targets" // required +#define PMIX_CLI_TERMINATE "terminate" // none +#define PMIX_CLI_PSET_NAME "pset" // required typedef void (*pmix_cmd_line_store_fn_t)(const char *name, const char *option, pmix_cli_result_t *results);